1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 584 #endif 585 if (im[i] >= rstart && im[i] < rend) { 586 row = im[i] - rstart; 587 lastcol1 = -1; 588 rp1 = aj + ai[row]; 589 ap1 = aa + ai[row]; 590 rmax1 = aimax[row]; 591 nrow1 = ailen[row]; 592 low1 = 0; 593 high1 = nrow1; 594 lastcol2 = -1; 595 rp2 = bj + bi[row]; 596 ap2 = ba + bi[row]; 597 rmax2 = bimax[row]; 598 nrow2 = bilen[row]; 599 low2 = 0; 600 high2 = nrow2; 601 602 for (j=0; j<n; j++) { 603 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 604 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 605 if (in[j] >= cstart && in[j] < cend) { 606 col = in[j] - cstart; 607 nonew = a->nonew; 608 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 610 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 611 #endif 612 } else if (in[j] < 0) continue; 613 #if defined(PETSC_USE_DEBUG) 614 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 615 #endif 616 else { 617 if (mat->was_assembled) { 618 if (!aij->colmap) { 619 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 620 } 621 #if defined(PETSC_USE_CTABLE) 622 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 623 col--; 624 #else 625 col = aij->colmap[in[j]] - 1; 626 #endif 627 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 628 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 629 col = in[j]; 630 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 631 B = aij->B; 632 b = (Mat_SeqAIJ*)B->data; 633 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 634 rp2 = bj + bi[row]; 635 ap2 = ba + bi[row]; 636 rmax2 = bimax[row]; 637 nrow2 = bilen[row]; 638 low2 = 0; 639 high2 = nrow2; 640 bm = aij->B->rmap->n; 641 ba = b->a; 642 inserted = PETSC_FALSE; 643 } else if (col < 0) { 644 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 645 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 646 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 647 } 648 } else col = in[j]; 649 nonew = b->nonew; 650 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 652 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 653 #endif 654 } 655 } 656 } else { 657 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 658 if (!aij->donotstash) { 659 mat->assembled = PETSC_FALSE; 660 if (roworiented) { 661 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 662 } else { 663 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 664 } 665 } 666 } 667 } 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 673 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 674 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 675 */ 676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 679 Mat A = aij->A; /* diagonal part of the matrix */ 680 Mat B = aij->B; /* offdiagonal part of the matrix */ 681 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 682 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 683 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 684 PetscInt *ailen = a->ilen,*aj = a->j; 685 PetscInt *bilen = b->ilen,*bj = b->j; 686 PetscInt am = aij->A->rmap->n,j; 687 PetscInt diag_so_far = 0,dnz; 688 PetscInt offd_so_far = 0,onz; 689 690 PetscFunctionBegin; 691 /* Iterate over all rows of the matrix */ 692 for (j=0; j<am; j++) { 693 dnz = onz = 0; 694 /* Iterate over all non-zero columns of the current row */ 695 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 696 /* If column is in the diagonal */ 697 if (mat_j[col] >= cstart && mat_j[col] < cend) { 698 aj[diag_so_far++] = mat_j[col] - cstart; 699 dnz++; 700 } else { /* off-diagonal entries */ 701 bj[offd_so_far++] = mat_j[col]; 702 onz++; 703 } 704 } 705 ailen[j] = dnz; 706 bilen[j] = onz; 707 } 708 PetscFunctionReturn(0); 709 } 710 711 /* 712 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 713 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 714 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 715 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 716 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 717 */ 718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat A = aij->A; /* diagonal part of the matrix */ 722 Mat B = aij->B; /* offdiagonal part of the matrix */ 723 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 725 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 726 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 727 PetscInt *ailen = a->ilen,*aj = a->j; 728 PetscInt *bilen = b->ilen,*bj = b->j; 729 PetscInt am = aij->A->rmap->n,j; 730 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 731 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 732 PetscScalar *aa = a->a,*ba = b->a; 733 734 PetscFunctionBegin; 735 /* Iterate over all rows of the matrix */ 736 for (j=0; j<am; j++) { 737 dnz_row = onz_row = 0; 738 rowstart_offd = full_offd_i[j]; 739 rowstart_diag = full_diag_i[j]; 740 /* Iterate over all non-zero columns of the current row */ 741 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 742 /* If column is in the diagonal */ 743 if (mat_j[col] >= cstart && mat_j[col] < cend) { 744 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 745 aa[rowstart_diag+dnz_row] = mat_a[col]; 746 dnz_row++; 747 } else { /* off-diagonal entries */ 748 bj[rowstart_offd+onz_row] = mat_j[col]; 749 ba[rowstart_offd+onz_row] = mat_a[col]; 750 onz_row++; 751 } 752 } 753 ailen[j] = dnz_row; 754 bilen[j] = onz_row; 755 } 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 762 PetscErrorCode ierr; 763 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 764 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 765 766 PetscFunctionBegin; 767 for (i=0; i<m; i++) { 768 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 769 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 770 if (idxm[i] >= rstart && idxm[i] < rend) { 771 row = idxm[i] - rstart; 772 for (j=0; j<n; j++) { 773 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 774 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 775 if (idxn[j] >= cstart && idxn[j] < cend) { 776 col = idxn[j] - cstart; 777 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 778 } else { 779 if (!aij->colmap) { 780 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 781 } 782 #if defined(PETSC_USE_CTABLE) 783 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 784 col--; 785 #else 786 col = aij->colmap[idxn[j]] - 1; 787 #endif 788 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 789 else { 790 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 791 } 792 } 793 } 794 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 800 801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 PetscErrorCode ierr; 805 PetscInt nstash,reallocs; 806 807 PetscFunctionBegin; 808 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 809 810 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 811 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 812 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 813 PetscFunctionReturn(0); 814 } 815 816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 817 { 818 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 819 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 820 PetscErrorCode ierr; 821 PetscMPIInt n; 822 PetscInt i,j,rstart,ncols,flg; 823 PetscInt *row,*col; 824 PetscBool other_disassembled; 825 PetscScalar *val; 826 827 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 828 829 PetscFunctionBegin; 830 if (!aij->donotstash && !mat->nooffprocentries) { 831 while (1) { 832 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 833 if (!flg) break; 834 835 for (i=0; i<n; ) { 836 /* Now identify the consecutive vals belonging to the same row */ 837 for (j=i,rstart=row[j]; j<n; j++) { 838 if (row[j] != rstart) break; 839 } 840 if (j < n) ncols = j-i; 841 else ncols = n-i; 842 /* Now assemble all these values with a single function call */ 843 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 844 845 i = j; 846 } 847 } 848 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 849 } 850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 851 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = 0; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscMPIInt p = 0; 993 PetscSFNode *rrows; 994 PetscSF sf; 995 const PetscScalar *xx; 996 PetscScalar *bb,*mask; 997 Vec xmask,lmask; 998 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 999 const PetscInt *aj, *ii,*ridx; 1000 PetscScalar *aa; 1001 1002 PetscFunctionBegin; 1003 /* Create SF where leaves are input rows and roots are owned rows */ 1004 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1005 for (r = 0; r < n; ++r) lrows[r] = -1; 1006 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1007 for (r = 0; r < N; ++r) { 1008 const PetscInt idx = rows[r]; 1009 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1010 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1011 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1012 } 1013 rrows[r].rank = p; 1014 rrows[r].index = rows[r] - owners[p]; 1015 } 1016 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1017 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1018 /* Collect flags for rows to be zeroed */ 1019 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1022 /* Compress and put in row numbers */ 1023 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1024 /* zero diagonal part of matrix */ 1025 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1026 /* handle off diagonal part of matrix */ 1027 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1028 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1029 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1030 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1036 PetscBool cong; 1037 1038 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1039 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1040 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1043 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1044 } 1045 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1046 /* remove zeroed rows of off diagonal matrix */ 1047 ii = aij->i; 1048 for (i=0; i<len; i++) { 1049 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1050 } 1051 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1052 if (aij->compressedrow.use) { 1053 m = aij->compressedrow.nrows; 1054 ii = aij->compressedrow.i; 1055 ridx = aij->compressedrow.rindex; 1056 for (i=0; i<m; i++) { 1057 n = ii[i+1] - ii[i]; 1058 aj = aij->j + ii[i]; 1059 aa = aij->a + ii[i]; 1060 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[*ridx] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 ridx++; 1070 } 1071 } else { /* do not use compressed row format */ 1072 m = l->B->rmap->n; 1073 for (i=0; i<m; i++) { 1074 n = ii[i+1] - ii[i]; 1075 aj = aij->j + ii[i]; 1076 aa = aij->a + ii[i]; 1077 for (j=0; j<n; j++) { 1078 if (PetscAbsScalar(mask[*aj])) { 1079 if (b) bb[i] -= *aa*xx[*aj]; 1080 *aa = 0.0; 1081 } 1082 aa++; 1083 aj++; 1084 } 1085 } 1086 } 1087 if (x && b) { 1088 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1089 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1090 } 1091 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1092 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1093 ierr = PetscFree(lrows);CHKERRQ(ierr); 1094 1095 /* only change matrix nonzero state if pattern was allowed to be changed */ 1096 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1097 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1098 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 VecScatter Mvctx = a->Mvctx; 1109 1110 PetscFunctionBegin; 1111 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1112 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1113 1114 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1116 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1122 { 1123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 VecScatter Mvctx = a->Mvctx; 1136 1137 PetscFunctionBegin; 1138 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1139 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1147 { 1148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1149 PetscErrorCode ierr; 1150 1151 PetscFunctionBegin; 1152 /* do nondiagonal part */ 1153 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1154 /* do local part */ 1155 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1156 /* add partial results together */ 1157 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1163 { 1164 MPI_Comm comm; 1165 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1166 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1167 IS Me,Notme; 1168 PetscErrorCode ierr; 1169 PetscInt M,N,first,last,*notme,i; 1170 PetscBool lf; 1171 PetscMPIInt size; 1172 1173 PetscFunctionBegin; 1174 /* Easy test: symmetric diagonal block */ 1175 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1176 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1177 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1178 if (!*f) PetscFunctionReturn(0); 1179 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1180 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1181 if (size == 1) PetscFunctionReturn(0); 1182 1183 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1184 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1185 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1186 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1187 for (i=0; i<first; i++) notme[i] = i; 1188 for (i=last; i<M; i++) notme[i-last+first] = i; 1189 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1190 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1191 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1192 Aoff = Aoffs[0]; 1193 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1194 Boff = Boffs[0]; 1195 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1197 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1199 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1200 ierr = PetscFree(notme);CHKERRQ(ierr); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1205 { 1206 PetscErrorCode ierr; 1207 1208 PetscFunctionBegin; 1209 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1214 { 1215 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1216 PetscErrorCode ierr; 1217 1218 PetscFunctionBegin; 1219 /* do nondiagonal part */ 1220 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1221 /* do local part */ 1222 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1223 /* add partial results together */ 1224 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 /* 1230 This only works correctly for square matrices where the subblock A->A is the 1231 diagonal block 1232 */ 1233 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1234 { 1235 PetscErrorCode ierr; 1236 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1237 1238 PetscFunctionBegin; 1239 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1240 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1241 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1246 { 1247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1248 PetscErrorCode ierr; 1249 1250 PetscFunctionBegin; 1251 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1252 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1257 { 1258 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 #if defined(PETSC_USE_LOG) 1263 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1264 #endif 1265 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1266 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1268 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1269 #if defined(PETSC_USE_CTABLE) 1270 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1271 #else 1272 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1273 #endif 1274 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1275 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1276 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1277 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1278 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1279 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1280 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1281 1282 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1292 #if defined(PETSC_HAVE_ELEMENTAL) 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1294 #endif 1295 #if defined(PETSC_HAVE_HYPRE) 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1298 #endif 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1306 { 1307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1308 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1309 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1310 const PetscInt *garray = aij->garray; 1311 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1312 PetscInt *rowlens; 1313 PetscInt *colidxs; 1314 PetscScalar *matvals; 1315 PetscErrorCode ierr; 1316 1317 PetscFunctionBegin; 1318 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1319 1320 M = mat->rmap->N; 1321 N = mat->cmap->N; 1322 m = mat->rmap->n; 1323 rs = mat->rmap->rstart; 1324 cs = mat->cmap->rstart; 1325 nz = A->nz + B->nz; 1326 1327 /* write matrix header */ 1328 header[0] = MAT_FILE_CLASSID; 1329 header[1] = M; header[2] = N; header[3] = nz; 1330 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1331 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1332 1333 /* fill in and store row lengths */ 1334 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1335 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1336 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1337 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1338 1339 /* fill in and store column indices */ 1340 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1341 for (cnt=0, i=0; i<m; i++) { 1342 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1343 if (garray[B->j[jb]] > cs) break; 1344 colidxs[cnt++] = garray[B->j[jb]]; 1345 } 1346 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1347 colidxs[cnt++] = A->j[ja] + cs; 1348 for (; jb<B->i[i+1]; jb++) 1349 colidxs[cnt++] = garray[B->j[jb]]; 1350 } 1351 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1352 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1353 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1354 1355 /* fill in and store nonzero values */ 1356 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1357 for (cnt=0, i=0; i<m; i++) { 1358 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1359 if (garray[B->j[jb]] > cs) break; 1360 matvals[cnt++] = B->a[jb]; 1361 } 1362 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1363 matvals[cnt++] = A->a[ja]; 1364 for (; jb<B->i[i+1]; jb++) 1365 matvals[cnt++] = B->a[jb]; 1366 } 1367 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1368 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1369 ierr = PetscFree(matvals);CHKERRQ(ierr); 1370 1371 /* write block size option to the viewer's .info file */ 1372 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1373 PetscFunctionReturn(0); 1374 } 1375 1376 #include <petscdraw.h> 1377 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1378 { 1379 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1380 PetscErrorCode ierr; 1381 PetscMPIInt rank = aij->rank,size = aij->size; 1382 PetscBool isdraw,iascii,isbinary; 1383 PetscViewer sviewer; 1384 PetscViewerFormat format; 1385 1386 PetscFunctionBegin; 1387 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1388 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1389 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1390 if (iascii) { 1391 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1392 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1393 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1394 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1395 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1396 for (i=0; i<(PetscInt)size; i++) { 1397 nmax = PetscMax(nmax,nz[i]); 1398 nmin = PetscMin(nmin,nz[i]); 1399 navg += nz[i]; 1400 } 1401 ierr = PetscFree(nz);CHKERRQ(ierr); 1402 navg = navg/size; 1403 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1404 PetscFunctionReturn(0); 1405 } 1406 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1407 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1408 MatInfo info; 1409 PetscBool inodes; 1410 1411 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1412 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1413 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1414 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1415 if (!inodes) { 1416 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1417 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1418 } else { 1419 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1420 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1421 } 1422 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1423 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1424 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1425 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1426 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1427 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1429 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1430 PetscFunctionReturn(0); 1431 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1432 PetscInt inodecount,inodelimit,*inodes; 1433 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1434 if (inodes) { 1435 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1436 } else { 1437 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1438 } 1439 PetscFunctionReturn(0); 1440 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1441 PetscFunctionReturn(0); 1442 } 1443 } else if (isbinary) { 1444 if (size == 1) { 1445 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1446 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1447 } else { 1448 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1449 } 1450 PetscFunctionReturn(0); 1451 } else if (iascii && size == 1) { 1452 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1453 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1454 PetscFunctionReturn(0); 1455 } else if (isdraw) { 1456 PetscDraw draw; 1457 PetscBool isnull; 1458 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1459 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1460 if (isnull) PetscFunctionReturn(0); 1461 } 1462 1463 { /* assemble the entire matrix onto first processor */ 1464 Mat A = NULL, Av; 1465 IS isrow,iscol; 1466 1467 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1468 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1469 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1470 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1471 /* The commented code uses MatCreateSubMatrices instead */ 1472 /* 1473 Mat *AA, A = NULL, Av; 1474 IS isrow,iscol; 1475 1476 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1477 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1478 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1479 if (!rank) { 1480 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1481 A = AA[0]; 1482 Av = AA[0]; 1483 } 1484 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1485 */ 1486 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1487 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1488 /* 1489 Everyone has to call to draw the matrix since the graphics waits are 1490 synchronized across all processors that share the PetscDraw object 1491 */ 1492 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1493 if (!rank) { 1494 if (((PetscObject)mat)->name) { 1495 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1496 } 1497 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1498 } 1499 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1500 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1501 ierr = MatDestroy(&A);CHKERRQ(ierr); 1502 } 1503 PetscFunctionReturn(0); 1504 } 1505 1506 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1507 { 1508 PetscErrorCode ierr; 1509 PetscBool iascii,isdraw,issocket,isbinary; 1510 1511 PetscFunctionBegin; 1512 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1513 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1514 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1515 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1516 if (iascii || isdraw || isbinary || issocket) { 1517 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1518 } 1519 PetscFunctionReturn(0); 1520 } 1521 1522 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1523 { 1524 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1525 PetscErrorCode ierr; 1526 Vec bb1 = 0; 1527 PetscBool hasop; 1528 1529 PetscFunctionBegin; 1530 if (flag == SOR_APPLY_UPPER) { 1531 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1532 PetscFunctionReturn(0); 1533 } 1534 1535 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1536 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1537 } 1538 1539 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1540 if (flag & SOR_ZERO_INITIAL_GUESS) { 1541 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1542 its--; 1543 } 1544 1545 while (its--) { 1546 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1547 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1548 1549 /* update rhs: bb1 = bb - B*x */ 1550 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1551 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1552 1553 /* local sweep */ 1554 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1555 } 1556 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1557 if (flag & SOR_ZERO_INITIAL_GUESS) { 1558 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1559 its--; 1560 } 1561 while (its--) { 1562 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1563 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1564 1565 /* update rhs: bb1 = bb - B*x */ 1566 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1567 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1568 1569 /* local sweep */ 1570 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1571 } 1572 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1573 if (flag & SOR_ZERO_INITIAL_GUESS) { 1574 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1575 its--; 1576 } 1577 while (its--) { 1578 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1579 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1580 1581 /* update rhs: bb1 = bb - B*x */ 1582 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1583 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1584 1585 /* local sweep */ 1586 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1587 } 1588 } else if (flag & SOR_EISENSTAT) { 1589 Vec xx1; 1590 1591 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1592 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1593 1594 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1595 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1596 if (!mat->diag) { 1597 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1598 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1599 } 1600 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1601 if (hasop) { 1602 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1603 } else { 1604 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1605 } 1606 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1607 1608 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1609 1610 /* local sweep */ 1611 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1612 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1613 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1614 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1615 1616 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1617 1618 matin->factorerrortype = mat->A->factorerrortype; 1619 PetscFunctionReturn(0); 1620 } 1621 1622 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1623 { 1624 Mat aA,aB,Aperm; 1625 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1626 PetscScalar *aa,*ba; 1627 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1628 PetscSF rowsf,sf; 1629 IS parcolp = NULL; 1630 PetscBool done; 1631 PetscErrorCode ierr; 1632 1633 PetscFunctionBegin; 1634 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1635 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1636 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1637 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1638 1639 /* Invert row permutation to find out where my rows should go */ 1640 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1641 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1642 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1643 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1644 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1645 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1646 1647 /* Invert column permutation to find out where my columns should go */ 1648 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1649 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1650 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1651 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1652 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1653 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1654 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1655 1656 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1657 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1658 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1659 1660 /* Find out where my gcols should go */ 1661 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1662 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1663 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1664 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1665 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1666 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1667 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1668 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1669 1670 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1671 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1672 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1673 for (i=0; i<m; i++) { 1674 PetscInt row = rdest[i]; 1675 PetscMPIInt rowner; 1676 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1677 for (j=ai[i]; j<ai[i+1]; j++) { 1678 PetscInt col = cdest[aj[j]]; 1679 PetscMPIInt cowner; 1680 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1681 if (rowner == cowner) dnnz[i]++; 1682 else onnz[i]++; 1683 } 1684 for (j=bi[i]; j<bi[i+1]; j++) { 1685 PetscInt col = gcdest[bj[j]]; 1686 PetscMPIInt cowner; 1687 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1688 if (rowner == cowner) dnnz[i]++; 1689 else onnz[i]++; 1690 } 1691 } 1692 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1693 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1694 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1695 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1696 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1697 1698 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1699 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1700 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1701 for (i=0; i<m; i++) { 1702 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1703 PetscInt j0,rowlen; 1704 rowlen = ai[i+1] - ai[i]; 1705 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1706 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1707 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1708 } 1709 rowlen = bi[i+1] - bi[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { 1711 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 } 1715 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1716 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1717 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1718 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1719 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1720 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1721 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1722 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1723 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1724 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1725 *B = Aperm; 1726 PetscFunctionReturn(0); 1727 } 1728 1729 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1730 { 1731 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1732 PetscErrorCode ierr; 1733 1734 PetscFunctionBegin; 1735 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1736 if (ghosts) *ghosts = aij->garray; 1737 PetscFunctionReturn(0); 1738 } 1739 1740 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1741 { 1742 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1743 Mat A = mat->A,B = mat->B; 1744 PetscErrorCode ierr; 1745 PetscLogDouble isend[5],irecv[5]; 1746 1747 PetscFunctionBegin; 1748 info->block_size = 1.0; 1749 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1750 1751 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1752 isend[3] = info->memory; isend[4] = info->mallocs; 1753 1754 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1757 isend[3] += info->memory; isend[4] += info->mallocs; 1758 if (flag == MAT_LOCAL) { 1759 info->nz_used = isend[0]; 1760 info->nz_allocated = isend[1]; 1761 info->nz_unneeded = isend[2]; 1762 info->memory = isend[3]; 1763 info->mallocs = isend[4]; 1764 } else if (flag == MAT_GLOBAL_MAX) { 1765 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1766 1767 info->nz_used = irecv[0]; 1768 info->nz_allocated = irecv[1]; 1769 info->nz_unneeded = irecv[2]; 1770 info->memory = irecv[3]; 1771 info->mallocs = irecv[4]; 1772 } else if (flag == MAT_GLOBAL_SUM) { 1773 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1774 1775 info->nz_used = irecv[0]; 1776 info->nz_allocated = irecv[1]; 1777 info->nz_unneeded = irecv[2]; 1778 info->memory = irecv[3]; 1779 info->mallocs = irecv[4]; 1780 } 1781 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1782 info->fill_ratio_needed = 0; 1783 info->factor_mallocs = 0; 1784 PetscFunctionReturn(0); 1785 } 1786 1787 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1788 { 1789 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1790 PetscErrorCode ierr; 1791 1792 PetscFunctionBegin; 1793 switch (op) { 1794 case MAT_NEW_NONZERO_LOCATIONS: 1795 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1796 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1797 case MAT_KEEP_NONZERO_PATTERN: 1798 case MAT_NEW_NONZERO_LOCATION_ERR: 1799 case MAT_USE_INODES: 1800 case MAT_IGNORE_ZERO_ENTRIES: 1801 MatCheckPreallocated(A,1); 1802 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1803 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1804 break; 1805 case MAT_ROW_ORIENTED: 1806 MatCheckPreallocated(A,1); 1807 a->roworiented = flg; 1808 1809 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1810 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1811 break; 1812 case MAT_NEW_DIAGONALS: 1813 case MAT_SORTED_FULL: 1814 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1815 break; 1816 case MAT_IGNORE_OFF_PROC_ENTRIES: 1817 a->donotstash = flg; 1818 break; 1819 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1820 case MAT_SPD: 1821 case MAT_SYMMETRIC: 1822 case MAT_STRUCTURALLY_SYMMETRIC: 1823 case MAT_HERMITIAN: 1824 case MAT_SYMMETRY_ETERNAL: 1825 break; 1826 case MAT_SUBMAT_SINGLEIS: 1827 A->submat_singleis = flg; 1828 break; 1829 case MAT_STRUCTURE_ONLY: 1830 /* The option is handled directly by MatSetOption() */ 1831 break; 1832 default: 1833 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1834 } 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1839 { 1840 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1841 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1842 PetscErrorCode ierr; 1843 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1844 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1845 PetscInt *cmap,*idx_p; 1846 1847 PetscFunctionBegin; 1848 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1849 mat->getrowactive = PETSC_TRUE; 1850 1851 if (!mat->rowvalues && (idx || v)) { 1852 /* 1853 allocate enough space to hold information from the longest row. 1854 */ 1855 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1856 PetscInt max = 1,tmp; 1857 for (i=0; i<matin->rmap->n; i++) { 1858 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1859 if (max < tmp) max = tmp; 1860 } 1861 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1862 } 1863 1864 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1865 lrow = row - rstart; 1866 1867 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1868 if (!v) {pvA = 0; pvB = 0;} 1869 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1870 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1871 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1872 nztot = nzA + nzB; 1873 1874 cmap = mat->garray; 1875 if (v || idx) { 1876 if (nztot) { 1877 /* Sort by increasing column numbers, assuming A and B already sorted */ 1878 PetscInt imark = -1; 1879 if (v) { 1880 *v = v_p = mat->rowvalues; 1881 for (i=0; i<nzB; i++) { 1882 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1883 else break; 1884 } 1885 imark = i; 1886 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1887 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1888 } 1889 if (idx) { 1890 *idx = idx_p = mat->rowindices; 1891 if (imark > -1) { 1892 for (i=0; i<imark; i++) { 1893 idx_p[i] = cmap[cworkB[i]]; 1894 } 1895 } else { 1896 for (i=0; i<nzB; i++) { 1897 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1898 else break; 1899 } 1900 imark = i; 1901 } 1902 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1903 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1904 } 1905 } else { 1906 if (idx) *idx = 0; 1907 if (v) *v = 0; 1908 } 1909 } 1910 *nz = nztot; 1911 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1912 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1917 { 1918 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1919 1920 PetscFunctionBegin; 1921 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1922 aij->getrowactive = PETSC_FALSE; 1923 PetscFunctionReturn(0); 1924 } 1925 1926 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1927 { 1928 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1929 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1930 PetscErrorCode ierr; 1931 PetscInt i,j,cstart = mat->cmap->rstart; 1932 PetscReal sum = 0.0; 1933 MatScalar *v; 1934 1935 PetscFunctionBegin; 1936 if (aij->size == 1) { 1937 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1938 } else { 1939 if (type == NORM_FROBENIUS) { 1940 v = amat->a; 1941 for (i=0; i<amat->nz; i++) { 1942 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1943 } 1944 v = bmat->a; 1945 for (i=0; i<bmat->nz; i++) { 1946 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1947 } 1948 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1949 *norm = PetscSqrtReal(*norm); 1950 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1951 } else if (type == NORM_1) { /* max column norm */ 1952 PetscReal *tmp,*tmp2; 1953 PetscInt *jj,*garray = aij->garray; 1954 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1955 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1956 *norm = 0.0; 1957 v = amat->a; jj = amat->j; 1958 for (j=0; j<amat->nz; j++) { 1959 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1960 } 1961 v = bmat->a; jj = bmat->j; 1962 for (j=0; j<bmat->nz; j++) { 1963 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1964 } 1965 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1966 for (j=0; j<mat->cmap->N; j++) { 1967 if (tmp2[j] > *norm) *norm = tmp2[j]; 1968 } 1969 ierr = PetscFree(tmp);CHKERRQ(ierr); 1970 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1971 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1972 } else if (type == NORM_INFINITY) { /* max row norm */ 1973 PetscReal ntemp = 0.0; 1974 for (j=0; j<aij->A->rmap->n; j++) { 1975 v = amat->a + amat->i[j]; 1976 sum = 0.0; 1977 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1978 sum += PetscAbsScalar(*v); v++; 1979 } 1980 v = bmat->a + bmat->i[j]; 1981 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1982 sum += PetscAbsScalar(*v); v++; 1983 } 1984 if (sum > ntemp) ntemp = sum; 1985 } 1986 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1987 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1988 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1989 } 1990 PetscFunctionReturn(0); 1991 } 1992 1993 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1994 { 1995 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1996 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1997 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 1998 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 1999 PetscErrorCode ierr; 2000 Mat B,A_diag,*B_diag; 2001 const MatScalar *array; 2002 2003 PetscFunctionBegin; 2004 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2005 ai = Aloc->i; aj = Aloc->j; 2006 bi = Bloc->i; bj = Bloc->j; 2007 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2008 PetscInt *d_nnz,*g_nnz,*o_nnz; 2009 PetscSFNode *oloc; 2010 PETSC_UNUSED PetscSF sf; 2011 2012 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2013 /* compute d_nnz for preallocation */ 2014 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2015 for (i=0; i<ai[ma]; i++) { 2016 d_nnz[aj[i]]++; 2017 } 2018 /* compute local off-diagonal contributions */ 2019 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2020 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2021 /* map those to global */ 2022 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2023 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2024 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2025 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2026 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2027 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2028 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2029 2030 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2031 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2032 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2033 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2034 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2035 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2036 } else { 2037 B = *matout; 2038 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2039 } 2040 2041 b = (Mat_MPIAIJ*)B->data; 2042 A_diag = a->A; 2043 B_diag = &b->A; 2044 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2045 A_diag_ncol = A_diag->cmap->N; 2046 B_diag_ilen = sub_B_diag->ilen; 2047 B_diag_i = sub_B_diag->i; 2048 2049 /* Set ilen for diagonal of B */ 2050 for (i=0; i<A_diag_ncol; i++) { 2051 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2052 } 2053 2054 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2055 very quickly (=without using MatSetValues), because all writes are local. */ 2056 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2057 2058 /* copy over the B part */ 2059 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2060 array = Bloc->a; 2061 row = A->rmap->rstart; 2062 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2063 cols_tmp = cols; 2064 for (i=0; i<mb; i++) { 2065 ncol = bi[i+1]-bi[i]; 2066 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2067 row++; 2068 array += ncol; cols_tmp += ncol; 2069 } 2070 ierr = PetscFree(cols);CHKERRQ(ierr); 2071 2072 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2073 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2074 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2075 *matout = B; 2076 } else { 2077 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2078 } 2079 PetscFunctionReturn(0); 2080 } 2081 2082 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2083 { 2084 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2085 Mat a = aij->A,b = aij->B; 2086 PetscErrorCode ierr; 2087 PetscInt s1,s2,s3; 2088 2089 PetscFunctionBegin; 2090 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2091 if (rr) { 2092 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2093 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2094 /* Overlap communication with computation. */ 2095 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2096 } 2097 if (ll) { 2098 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2099 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2100 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2101 } 2102 /* scale the diagonal block */ 2103 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2104 2105 if (rr) { 2106 /* Do a scatter end and then right scale the off-diagonal block */ 2107 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2108 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2109 } 2110 PetscFunctionReturn(0); 2111 } 2112 2113 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2114 { 2115 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2124 { 2125 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2126 Mat a,b,c,d; 2127 PetscBool flg; 2128 PetscErrorCode ierr; 2129 2130 PetscFunctionBegin; 2131 a = matA->A; b = matA->B; 2132 c = matB->A; d = matB->B; 2133 2134 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2135 if (flg) { 2136 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2137 } 2138 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2139 PetscFunctionReturn(0); 2140 } 2141 2142 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2143 { 2144 PetscErrorCode ierr; 2145 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2146 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2147 2148 PetscFunctionBegin; 2149 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2150 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2151 /* because of the column compression in the off-processor part of the matrix a->B, 2152 the number of columns in a->B and b->B may be different, hence we cannot call 2153 the MatCopy() directly on the two parts. If need be, we can provide a more 2154 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2155 then copying the submatrices */ 2156 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2157 } else { 2158 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2159 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2160 } 2161 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2162 PetscFunctionReturn(0); 2163 } 2164 2165 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2166 { 2167 PetscErrorCode ierr; 2168 2169 PetscFunctionBegin; 2170 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2171 PetscFunctionReturn(0); 2172 } 2173 2174 /* 2175 Computes the number of nonzeros per row needed for preallocation when X and Y 2176 have different nonzero structure. 2177 */ 2178 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2179 { 2180 PetscInt i,j,k,nzx,nzy; 2181 2182 PetscFunctionBegin; 2183 /* Set the number of nonzeros in the new matrix */ 2184 for (i=0; i<m; i++) { 2185 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2186 nzx = xi[i+1] - xi[i]; 2187 nzy = yi[i+1] - yi[i]; 2188 nnz[i] = 0; 2189 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2190 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2191 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2192 nnz[i]++; 2193 } 2194 for (; k<nzy; k++) nnz[i]++; 2195 } 2196 PetscFunctionReturn(0); 2197 } 2198 2199 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2200 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2201 { 2202 PetscErrorCode ierr; 2203 PetscInt m = Y->rmap->N; 2204 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2205 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2206 2207 PetscFunctionBegin; 2208 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2209 PetscFunctionReturn(0); 2210 } 2211 2212 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2213 { 2214 PetscErrorCode ierr; 2215 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2216 PetscBLASInt bnz,one=1; 2217 Mat_SeqAIJ *x,*y; 2218 2219 PetscFunctionBegin; 2220 if (str == SAME_NONZERO_PATTERN) { 2221 PetscScalar alpha = a; 2222 x = (Mat_SeqAIJ*)xx->A->data; 2223 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2224 y = (Mat_SeqAIJ*)yy->A->data; 2225 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2226 x = (Mat_SeqAIJ*)xx->B->data; 2227 y = (Mat_SeqAIJ*)yy->B->data; 2228 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2229 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2230 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2231 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2232 will be updated */ 2233 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2234 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2235 Y->offloadmask = PETSC_OFFLOAD_CPU; 2236 } 2237 #endif 2238 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2239 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2240 } else { 2241 Mat B; 2242 PetscInt *nnz_d,*nnz_o; 2243 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2244 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2245 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2246 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2247 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2248 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2249 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2250 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2251 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2252 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2253 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2254 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2255 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2256 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2257 } 2258 PetscFunctionReturn(0); 2259 } 2260 2261 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2262 2263 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2264 { 2265 #if defined(PETSC_USE_COMPLEX) 2266 PetscErrorCode ierr; 2267 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2268 2269 PetscFunctionBegin; 2270 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2271 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2272 #else 2273 PetscFunctionBegin; 2274 #endif 2275 PetscFunctionReturn(0); 2276 } 2277 2278 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2279 { 2280 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2281 PetscErrorCode ierr; 2282 2283 PetscFunctionBegin; 2284 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2285 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2286 PetscFunctionReturn(0); 2287 } 2288 2289 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2290 { 2291 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2292 PetscErrorCode ierr; 2293 2294 PetscFunctionBegin; 2295 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2296 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2297 PetscFunctionReturn(0); 2298 } 2299 2300 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2301 { 2302 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2303 PetscErrorCode ierr; 2304 PetscInt i,*idxb = 0; 2305 PetscScalar *va,*vb; 2306 Vec vtmp; 2307 2308 PetscFunctionBegin; 2309 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2310 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2311 if (idx) { 2312 for (i=0; i<A->rmap->n; i++) { 2313 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2314 } 2315 } 2316 2317 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2318 if (idx) { 2319 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2320 } 2321 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2322 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2323 2324 for (i=0; i<A->rmap->n; i++) { 2325 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2326 va[i] = vb[i]; 2327 if (idx) idx[i] = a->garray[idxb[i]]; 2328 } 2329 } 2330 2331 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2332 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2333 ierr = PetscFree(idxb);CHKERRQ(ierr); 2334 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2335 PetscFunctionReturn(0); 2336 } 2337 2338 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2339 { 2340 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2341 PetscErrorCode ierr; 2342 PetscInt i,*idxb = 0; 2343 PetscScalar *va,*vb; 2344 Vec vtmp; 2345 2346 PetscFunctionBegin; 2347 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2348 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2349 if (idx) { 2350 for (i=0; i<A->cmap->n; i++) { 2351 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2352 } 2353 } 2354 2355 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2356 if (idx) { 2357 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2358 } 2359 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2360 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2361 2362 for (i=0; i<A->rmap->n; i++) { 2363 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2364 va[i] = vb[i]; 2365 if (idx) idx[i] = a->garray[idxb[i]]; 2366 } 2367 } 2368 2369 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2370 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2371 ierr = PetscFree(idxb);CHKERRQ(ierr); 2372 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2373 PetscFunctionReturn(0); 2374 } 2375 2376 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2377 { 2378 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2379 PetscInt n = A->rmap->n; 2380 PetscInt cstart = A->cmap->rstart; 2381 PetscInt *cmap = mat->garray; 2382 PetscInt *diagIdx, *offdiagIdx; 2383 Vec diagV, offdiagV; 2384 PetscScalar *a, *diagA, *offdiagA; 2385 PetscInt r; 2386 PetscErrorCode ierr; 2387 2388 PetscFunctionBegin; 2389 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2390 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2391 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2392 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2393 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2394 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2395 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2396 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2397 for (r = 0; r < n; ++r) { 2398 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2399 a[r] = diagA[r]; 2400 idx[r] = cstart + diagIdx[r]; 2401 } else { 2402 a[r] = offdiagA[r]; 2403 idx[r] = cmap[offdiagIdx[r]]; 2404 } 2405 } 2406 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2407 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2408 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2409 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2410 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2411 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2412 PetscFunctionReturn(0); 2413 } 2414 2415 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2416 { 2417 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2418 PetscInt n = A->rmap->n; 2419 PetscInt cstart = A->cmap->rstart; 2420 PetscInt *cmap = mat->garray; 2421 PetscInt *diagIdx, *offdiagIdx; 2422 Vec diagV, offdiagV; 2423 PetscScalar *a, *diagA, *offdiagA; 2424 PetscInt r; 2425 PetscErrorCode ierr; 2426 2427 PetscFunctionBegin; 2428 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2429 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2430 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2431 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2432 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2433 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2434 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2435 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2436 for (r = 0; r < n; ++r) { 2437 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2438 a[r] = diagA[r]; 2439 idx[r] = cstart + diagIdx[r]; 2440 } else { 2441 a[r] = offdiagA[r]; 2442 idx[r] = cmap[offdiagIdx[r]]; 2443 } 2444 } 2445 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2446 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2447 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2448 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2449 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2450 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2455 { 2456 PetscErrorCode ierr; 2457 Mat *dummy; 2458 2459 PetscFunctionBegin; 2460 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2461 *newmat = *dummy; 2462 ierr = PetscFree(dummy);CHKERRQ(ierr); 2463 PetscFunctionReturn(0); 2464 } 2465 2466 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2467 { 2468 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2469 PetscErrorCode ierr; 2470 2471 PetscFunctionBegin; 2472 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2473 A->factorerrortype = a->A->factorerrortype; 2474 PetscFunctionReturn(0); 2475 } 2476 2477 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2478 { 2479 PetscErrorCode ierr; 2480 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2481 2482 PetscFunctionBegin; 2483 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2484 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2485 if (x->assembled) { 2486 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2487 } else { 2488 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2489 } 2490 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2491 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2492 PetscFunctionReturn(0); 2493 } 2494 2495 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2496 { 2497 PetscFunctionBegin; 2498 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2499 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2500 PetscFunctionReturn(0); 2501 } 2502 2503 /*@ 2504 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2505 2506 Collective on Mat 2507 2508 Input Parameters: 2509 + A - the matrix 2510 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2511 2512 Level: advanced 2513 2514 @*/ 2515 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2516 { 2517 PetscErrorCode ierr; 2518 2519 PetscFunctionBegin; 2520 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2521 PetscFunctionReturn(0); 2522 } 2523 2524 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2525 { 2526 PetscErrorCode ierr; 2527 PetscBool sc = PETSC_FALSE,flg; 2528 2529 PetscFunctionBegin; 2530 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2531 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2532 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2533 if (flg) { 2534 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2535 } 2536 ierr = PetscOptionsTail();CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2541 { 2542 PetscErrorCode ierr; 2543 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2544 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2545 2546 PetscFunctionBegin; 2547 if (!Y->preallocated) { 2548 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2549 } else if (!aij->nz) { 2550 PetscInt nonew = aij->nonew; 2551 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2552 aij->nonew = nonew; 2553 } 2554 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2555 PetscFunctionReturn(0); 2556 } 2557 2558 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2559 { 2560 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2561 PetscErrorCode ierr; 2562 2563 PetscFunctionBegin; 2564 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2565 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2566 if (d) { 2567 PetscInt rstart; 2568 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2569 *d += rstart; 2570 2571 } 2572 PetscFunctionReturn(0); 2573 } 2574 2575 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2576 { 2577 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2578 PetscErrorCode ierr; 2579 2580 PetscFunctionBegin; 2581 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2582 PetscFunctionReturn(0); 2583 } 2584 2585 /* -------------------------------------------------------------------*/ 2586 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2587 MatGetRow_MPIAIJ, 2588 MatRestoreRow_MPIAIJ, 2589 MatMult_MPIAIJ, 2590 /* 4*/ MatMultAdd_MPIAIJ, 2591 MatMultTranspose_MPIAIJ, 2592 MatMultTransposeAdd_MPIAIJ, 2593 0, 2594 0, 2595 0, 2596 /*10*/ 0, 2597 0, 2598 0, 2599 MatSOR_MPIAIJ, 2600 MatTranspose_MPIAIJ, 2601 /*15*/ MatGetInfo_MPIAIJ, 2602 MatEqual_MPIAIJ, 2603 MatGetDiagonal_MPIAIJ, 2604 MatDiagonalScale_MPIAIJ, 2605 MatNorm_MPIAIJ, 2606 /*20*/ MatAssemblyBegin_MPIAIJ, 2607 MatAssemblyEnd_MPIAIJ, 2608 MatSetOption_MPIAIJ, 2609 MatZeroEntries_MPIAIJ, 2610 /*24*/ MatZeroRows_MPIAIJ, 2611 0, 2612 0, 2613 0, 2614 0, 2615 /*29*/ MatSetUp_MPIAIJ, 2616 0, 2617 0, 2618 MatGetDiagonalBlock_MPIAIJ, 2619 0, 2620 /*34*/ MatDuplicate_MPIAIJ, 2621 0, 2622 0, 2623 0, 2624 0, 2625 /*39*/ MatAXPY_MPIAIJ, 2626 MatCreateSubMatrices_MPIAIJ, 2627 MatIncreaseOverlap_MPIAIJ, 2628 MatGetValues_MPIAIJ, 2629 MatCopy_MPIAIJ, 2630 /*44*/ MatGetRowMax_MPIAIJ, 2631 MatScale_MPIAIJ, 2632 MatShift_MPIAIJ, 2633 MatDiagonalSet_MPIAIJ, 2634 MatZeroRowsColumns_MPIAIJ, 2635 /*49*/ MatSetRandom_MPIAIJ, 2636 0, 2637 0, 2638 0, 2639 0, 2640 /*54*/ MatFDColoringCreate_MPIXAIJ, 2641 0, 2642 MatSetUnfactored_MPIAIJ, 2643 MatPermute_MPIAIJ, 2644 0, 2645 /*59*/ MatCreateSubMatrix_MPIAIJ, 2646 MatDestroy_MPIAIJ, 2647 MatView_MPIAIJ, 2648 0, 2649 0, 2650 /*64*/ 0, 2651 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2652 0, 2653 0, 2654 0, 2655 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2656 MatGetRowMinAbs_MPIAIJ, 2657 0, 2658 0, 2659 0, 2660 0, 2661 /*75*/ MatFDColoringApply_AIJ, 2662 MatSetFromOptions_MPIAIJ, 2663 0, 2664 0, 2665 MatFindZeroDiagonals_MPIAIJ, 2666 /*80*/ 0, 2667 0, 2668 0, 2669 /*83*/ MatLoad_MPIAIJ, 2670 MatIsSymmetric_MPIAIJ, 2671 0, 2672 0, 2673 0, 2674 0, 2675 /*89*/ 0, 2676 0, 2677 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2678 0, 2679 0, 2680 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2681 0, 2682 0, 2683 0, 2684 MatBindToCPU_MPIAIJ, 2685 /*99*/ MatProductSetFromOptions_MPIAIJ, 2686 0, 2687 0, 2688 MatConjugate_MPIAIJ, 2689 0, 2690 /*104*/MatSetValuesRow_MPIAIJ, 2691 MatRealPart_MPIAIJ, 2692 MatImaginaryPart_MPIAIJ, 2693 0, 2694 0, 2695 /*109*/0, 2696 0, 2697 MatGetRowMin_MPIAIJ, 2698 0, 2699 MatMissingDiagonal_MPIAIJ, 2700 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2701 0, 2702 MatGetGhosts_MPIAIJ, 2703 0, 2704 0, 2705 /*119*/0, 2706 0, 2707 0, 2708 0, 2709 MatGetMultiProcBlock_MPIAIJ, 2710 /*124*/MatFindNonzeroRows_MPIAIJ, 2711 MatGetColumnNorms_MPIAIJ, 2712 MatInvertBlockDiagonal_MPIAIJ, 2713 MatInvertVariableBlockDiagonal_MPIAIJ, 2714 MatCreateSubMatricesMPI_MPIAIJ, 2715 /*129*/0, 2716 0, 2717 0, 2718 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2719 0, 2720 /*134*/0, 2721 0, 2722 0, 2723 0, 2724 0, 2725 /*139*/MatSetBlockSizes_MPIAIJ, 2726 0, 2727 0, 2728 MatFDColoringSetUp_MPIXAIJ, 2729 MatFindOffBlockDiagonalEntries_MPIAIJ, 2730 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2731 /*145*/0, 2732 0, 2733 0 2734 }; 2735 2736 /* ----------------------------------------------------------------------------------------*/ 2737 2738 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2739 { 2740 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2741 PetscErrorCode ierr; 2742 2743 PetscFunctionBegin; 2744 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2745 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2746 PetscFunctionReturn(0); 2747 } 2748 2749 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2750 { 2751 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2752 PetscErrorCode ierr; 2753 2754 PetscFunctionBegin; 2755 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2756 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2757 PetscFunctionReturn(0); 2758 } 2759 2760 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2761 { 2762 Mat_MPIAIJ *b; 2763 PetscErrorCode ierr; 2764 PetscMPIInt size; 2765 2766 PetscFunctionBegin; 2767 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2768 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2769 b = (Mat_MPIAIJ*)B->data; 2770 2771 #if defined(PETSC_USE_CTABLE) 2772 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2773 #else 2774 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2775 #endif 2776 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2777 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2778 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2779 2780 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2781 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2782 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2783 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2784 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2785 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2786 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2787 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2788 2789 if (!B->preallocated) { 2790 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2791 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2792 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2793 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2794 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2795 } 2796 2797 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2798 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2799 B->preallocated = PETSC_TRUE; 2800 B->was_assembled = PETSC_FALSE; 2801 B->assembled = PETSC_FALSE; 2802 PetscFunctionReturn(0); 2803 } 2804 2805 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2806 { 2807 Mat_MPIAIJ *b; 2808 PetscErrorCode ierr; 2809 2810 PetscFunctionBegin; 2811 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2812 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2813 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2814 b = (Mat_MPIAIJ*)B->data; 2815 2816 #if defined(PETSC_USE_CTABLE) 2817 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2818 #else 2819 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2820 #endif 2821 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2822 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2823 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2824 2825 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2826 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2827 B->preallocated = PETSC_TRUE; 2828 B->was_assembled = PETSC_FALSE; 2829 B->assembled = PETSC_FALSE; 2830 PetscFunctionReturn(0); 2831 } 2832 2833 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2834 { 2835 Mat mat; 2836 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2837 PetscErrorCode ierr; 2838 2839 PetscFunctionBegin; 2840 *newmat = 0; 2841 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2842 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2843 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2844 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2845 a = (Mat_MPIAIJ*)mat->data; 2846 2847 mat->factortype = matin->factortype; 2848 mat->assembled = matin->assembled; 2849 mat->insertmode = NOT_SET_VALUES; 2850 mat->preallocated = matin->preallocated; 2851 2852 a->size = oldmat->size; 2853 a->rank = oldmat->rank; 2854 a->donotstash = oldmat->donotstash; 2855 a->roworiented = oldmat->roworiented; 2856 a->rowindices = NULL; 2857 a->rowvalues = NULL; 2858 a->getrowactive = PETSC_FALSE; 2859 2860 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2861 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2862 2863 if (oldmat->colmap) { 2864 #if defined(PETSC_USE_CTABLE) 2865 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2866 #else 2867 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2868 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2869 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2870 #endif 2871 } else a->colmap = NULL; 2872 if (oldmat->garray) { 2873 PetscInt len; 2874 len = oldmat->B->cmap->n; 2875 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2876 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2877 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2878 } else a->garray = NULL; 2879 2880 /* It may happen MatDuplicate is called with a non-assembled matrix 2881 In fact, MatDuplicate only requires the matrix to be preallocated 2882 This may happen inside a DMCreateMatrix_Shell */ 2883 if (oldmat->lvec) { 2884 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2885 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2886 } 2887 if (oldmat->Mvctx) { 2888 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2890 } 2891 if (oldmat->Mvctx_mpi1) { 2892 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2893 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2894 } 2895 2896 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2897 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2898 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2899 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2900 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2901 *newmat = mat; 2902 PetscFunctionReturn(0); 2903 } 2904 2905 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2906 { 2907 PetscBool isbinary, ishdf5; 2908 PetscErrorCode ierr; 2909 2910 PetscFunctionBegin; 2911 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2912 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2913 /* force binary viewer to load .info file if it has not yet done so */ 2914 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2915 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2916 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2917 if (isbinary) { 2918 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2919 } else if (ishdf5) { 2920 #if defined(PETSC_HAVE_HDF5) 2921 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2922 #else 2923 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2924 #endif 2925 } else { 2926 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2927 } 2928 PetscFunctionReturn(0); 2929 } 2930 2931 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2932 { 2933 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2934 PetscInt *rowidxs,*colidxs; 2935 PetscScalar *matvals; 2936 PetscErrorCode ierr; 2937 2938 PetscFunctionBegin; 2939 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2940 2941 /* read in matrix header */ 2942 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2943 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2944 M = header[1]; N = header[2]; nz = header[3]; 2945 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 2946 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 2947 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2948 2949 /* set block sizes from the viewer's .info file */ 2950 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 2951 /* set global sizes if not set already */ 2952 if (mat->rmap->N < 0) mat->rmap->N = M; 2953 if (mat->cmap->N < 0) mat->cmap->N = N; 2954 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 2955 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 2956 2957 /* check if the matrix sizes are correct */ 2958 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2959 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 2960 2961 /* read in row lengths and build row indices */ 2962 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 2963 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 2964 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 2965 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 2966 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 2967 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 2968 /* read in column indices and matrix values */ 2969 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 2970 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 2971 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 2972 /* store matrix indices and values */ 2973 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 2974 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 2975 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 2976 PetscFunctionReturn(0); 2977 } 2978 2979 /* Not scalable because of ISAllGather() unless getting all columns. */ 2980 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2981 { 2982 PetscErrorCode ierr; 2983 IS iscol_local; 2984 PetscBool isstride; 2985 PetscMPIInt lisstride=0,gisstride; 2986 2987 PetscFunctionBegin; 2988 /* check if we are grabbing all columns*/ 2989 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2990 2991 if (isstride) { 2992 PetscInt start,len,mstart,mlen; 2993 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2994 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2995 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 2996 if (mstart == start && mlen-mstart == len) lisstride = 1; 2997 } 2998 2999 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3000 if (gisstride) { 3001 PetscInt N; 3002 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3003 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3004 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3005 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3006 } else { 3007 PetscInt cbs; 3008 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3009 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3010 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3011 } 3012 3013 *isseq = iscol_local; 3014 PetscFunctionReturn(0); 3015 } 3016 3017 /* 3018 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3019 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3020 3021 Input Parameters: 3022 mat - matrix 3023 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3024 i.e., mat->rstart <= isrow[i] < mat->rend 3025 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3026 i.e., mat->cstart <= iscol[i] < mat->cend 3027 Output Parameter: 3028 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3029 iscol_o - sequential column index set for retrieving mat->B 3030 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3031 */ 3032 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3033 { 3034 PetscErrorCode ierr; 3035 Vec x,cmap; 3036 const PetscInt *is_idx; 3037 PetscScalar *xarray,*cmaparray; 3038 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3039 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3040 Mat B=a->B; 3041 Vec lvec=a->lvec,lcmap; 3042 PetscInt i,cstart,cend,Bn=B->cmap->N; 3043 MPI_Comm comm; 3044 VecScatter Mvctx=a->Mvctx; 3045 3046 PetscFunctionBegin; 3047 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3048 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3049 3050 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3051 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3052 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3053 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3054 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3055 3056 /* Get start indices */ 3057 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3058 isstart -= ncols; 3059 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3060 3061 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3062 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3063 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3064 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3065 for (i=0; i<ncols; i++) { 3066 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3067 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3068 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3069 } 3070 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3071 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3072 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3073 3074 /* Get iscol_d */ 3075 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3076 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3077 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3078 3079 /* Get isrow_d */ 3080 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3081 rstart = mat->rmap->rstart; 3082 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3083 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3084 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3085 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3086 3087 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3088 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3089 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3090 3091 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3092 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3093 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3094 3095 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3096 3097 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3098 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3099 3100 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3101 /* off-process column indices */ 3102 count = 0; 3103 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3104 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3105 3106 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3107 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3108 for (i=0; i<Bn; i++) { 3109 if (PetscRealPart(xarray[i]) > -1.0) { 3110 idx[count] = i; /* local column index in off-diagonal part B */ 3111 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3112 count++; 3113 } 3114 } 3115 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3116 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3117 3118 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3119 /* cannot ensure iscol_o has same blocksize as iscol! */ 3120 3121 ierr = PetscFree(idx);CHKERRQ(ierr); 3122 *garray = cmap1; 3123 3124 ierr = VecDestroy(&x);CHKERRQ(ierr); 3125 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3126 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3127 PetscFunctionReturn(0); 3128 } 3129 3130 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3131 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3132 { 3133 PetscErrorCode ierr; 3134 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3135 Mat M = NULL; 3136 MPI_Comm comm; 3137 IS iscol_d,isrow_d,iscol_o; 3138 Mat Asub = NULL,Bsub = NULL; 3139 PetscInt n; 3140 3141 PetscFunctionBegin; 3142 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3143 3144 if (call == MAT_REUSE_MATRIX) { 3145 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3146 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3147 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3148 3149 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3150 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3151 3152 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3153 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3154 3155 /* Update diagonal and off-diagonal portions of submat */ 3156 asub = (Mat_MPIAIJ*)(*submat)->data; 3157 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3158 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3159 if (n) { 3160 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3161 } 3162 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3163 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3164 3165 } else { /* call == MAT_INITIAL_MATRIX) */ 3166 const PetscInt *garray; 3167 PetscInt BsubN; 3168 3169 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3170 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3171 3172 /* Create local submatrices Asub and Bsub */ 3173 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3174 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3175 3176 /* Create submatrix M */ 3177 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3178 3179 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3180 asub = (Mat_MPIAIJ*)M->data; 3181 3182 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3183 n = asub->B->cmap->N; 3184 if (BsubN > n) { 3185 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3186 const PetscInt *idx; 3187 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3188 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3189 3190 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3191 j = 0; 3192 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3193 for (i=0; i<n; i++) { 3194 if (j >= BsubN) break; 3195 while (subgarray[i] > garray[j]) j++; 3196 3197 if (subgarray[i] == garray[j]) { 3198 idx_new[i] = idx[j++]; 3199 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3200 } 3201 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3202 3203 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3204 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3205 3206 } else if (BsubN < n) { 3207 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3208 } 3209 3210 ierr = PetscFree(garray);CHKERRQ(ierr); 3211 *submat = M; 3212 3213 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3214 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3215 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3216 3217 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3218 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3219 3220 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3221 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3222 } 3223 PetscFunctionReturn(0); 3224 } 3225 3226 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3227 { 3228 PetscErrorCode ierr; 3229 IS iscol_local=NULL,isrow_d; 3230 PetscInt csize; 3231 PetscInt n,i,j,start,end; 3232 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3233 MPI_Comm comm; 3234 3235 PetscFunctionBegin; 3236 /* If isrow has same processor distribution as mat, 3237 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3238 if (call == MAT_REUSE_MATRIX) { 3239 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3240 if (isrow_d) { 3241 sameRowDist = PETSC_TRUE; 3242 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3243 } else { 3244 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3245 if (iscol_local) { 3246 sameRowDist = PETSC_TRUE; 3247 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3248 } 3249 } 3250 } else { 3251 /* Check if isrow has same processor distribution as mat */ 3252 sameDist[0] = PETSC_FALSE; 3253 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3254 if (!n) { 3255 sameDist[0] = PETSC_TRUE; 3256 } else { 3257 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3258 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3259 if (i >= start && j < end) { 3260 sameDist[0] = PETSC_TRUE; 3261 } 3262 } 3263 3264 /* Check if iscol has same processor distribution as mat */ 3265 sameDist[1] = PETSC_FALSE; 3266 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3267 if (!n) { 3268 sameDist[1] = PETSC_TRUE; 3269 } else { 3270 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3271 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3272 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3273 } 3274 3275 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3276 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3277 sameRowDist = tsameDist[0]; 3278 } 3279 3280 if (sameRowDist) { 3281 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3282 /* isrow and iscol have same processor distribution as mat */ 3283 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3284 PetscFunctionReturn(0); 3285 } else { /* sameRowDist */ 3286 /* isrow has same processor distribution as mat */ 3287 if (call == MAT_INITIAL_MATRIX) { 3288 PetscBool sorted; 3289 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3290 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3291 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3292 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3293 3294 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3295 if (sorted) { 3296 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3297 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3298 PetscFunctionReturn(0); 3299 } 3300 } else { /* call == MAT_REUSE_MATRIX */ 3301 IS iscol_sub; 3302 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3303 if (iscol_sub) { 3304 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3305 PetscFunctionReturn(0); 3306 } 3307 } 3308 } 3309 } 3310 3311 /* General case: iscol -> iscol_local which has global size of iscol */ 3312 if (call == MAT_REUSE_MATRIX) { 3313 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3314 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3315 } else { 3316 if (!iscol_local) { 3317 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3318 } 3319 } 3320 3321 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3322 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3323 3324 if (call == MAT_INITIAL_MATRIX) { 3325 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3326 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3327 } 3328 PetscFunctionReturn(0); 3329 } 3330 3331 /*@C 3332 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3333 and "off-diagonal" part of the matrix in CSR format. 3334 3335 Collective 3336 3337 Input Parameters: 3338 + comm - MPI communicator 3339 . A - "diagonal" portion of matrix 3340 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3341 - garray - global index of B columns 3342 3343 Output Parameter: 3344 . mat - the matrix, with input A as its local diagonal matrix 3345 Level: advanced 3346 3347 Notes: 3348 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3349 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3350 3351 .seealso: MatCreateMPIAIJWithSplitArrays() 3352 @*/ 3353 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3354 { 3355 PetscErrorCode ierr; 3356 Mat_MPIAIJ *maij; 3357 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3358 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3359 PetscScalar *oa=b->a; 3360 Mat Bnew; 3361 PetscInt m,n,N; 3362 3363 PetscFunctionBegin; 3364 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3365 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3366 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3367 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3368 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3369 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3370 3371 /* Get global columns of mat */ 3372 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3373 3374 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3375 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3376 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3377 maij = (Mat_MPIAIJ*)(*mat)->data; 3378 3379 (*mat)->preallocated = PETSC_TRUE; 3380 3381 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3382 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3383 3384 /* Set A as diagonal portion of *mat */ 3385 maij->A = A; 3386 3387 nz = oi[m]; 3388 for (i=0; i<nz; i++) { 3389 col = oj[i]; 3390 oj[i] = garray[col]; 3391 } 3392 3393 /* Set Bnew as off-diagonal portion of *mat */ 3394 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3395 bnew = (Mat_SeqAIJ*)Bnew->data; 3396 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3397 maij->B = Bnew; 3398 3399 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3400 3401 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3402 b->free_a = PETSC_FALSE; 3403 b->free_ij = PETSC_FALSE; 3404 ierr = MatDestroy(&B);CHKERRQ(ierr); 3405 3406 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3407 bnew->free_a = PETSC_TRUE; 3408 bnew->free_ij = PETSC_TRUE; 3409 3410 /* condense columns of maij->B */ 3411 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3412 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3413 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3414 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3415 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3416 PetscFunctionReturn(0); 3417 } 3418 3419 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3420 3421 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3422 { 3423 PetscErrorCode ierr; 3424 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3425 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3426 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3427 Mat M,Msub,B=a->B; 3428 MatScalar *aa; 3429 Mat_SeqAIJ *aij; 3430 PetscInt *garray = a->garray,*colsub,Ncols; 3431 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3432 IS iscol_sub,iscmap; 3433 const PetscInt *is_idx,*cmap; 3434 PetscBool allcolumns=PETSC_FALSE; 3435 MPI_Comm comm; 3436 3437 PetscFunctionBegin; 3438 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3439 3440 if (call == MAT_REUSE_MATRIX) { 3441 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3442 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3443 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3444 3445 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3446 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3447 3448 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3449 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3450 3451 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3452 3453 } else { /* call == MAT_INITIAL_MATRIX) */ 3454 PetscBool flg; 3455 3456 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3457 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3458 3459 /* (1) iscol -> nonscalable iscol_local */ 3460 /* Check for special case: each processor gets entire matrix columns */ 3461 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3462 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3463 if (allcolumns) { 3464 iscol_sub = iscol_local; 3465 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3466 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3467 3468 } else { 3469 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3470 PetscInt *idx,*cmap1,k; 3471 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3472 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3473 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3474 count = 0; 3475 k = 0; 3476 for (i=0; i<Ncols; i++) { 3477 j = is_idx[i]; 3478 if (j >= cstart && j < cend) { 3479 /* diagonal part of mat */ 3480 idx[count] = j; 3481 cmap1[count++] = i; /* column index in submat */ 3482 } else if (Bn) { 3483 /* off-diagonal part of mat */ 3484 if (j == garray[k]) { 3485 idx[count] = j; 3486 cmap1[count++] = i; /* column index in submat */ 3487 } else if (j > garray[k]) { 3488 while (j > garray[k] && k < Bn-1) k++; 3489 if (j == garray[k]) { 3490 idx[count] = j; 3491 cmap1[count++] = i; /* column index in submat */ 3492 } 3493 } 3494 } 3495 } 3496 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3497 3498 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3499 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3500 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3501 3502 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3503 } 3504 3505 /* (3) Create sequential Msub */ 3506 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3507 } 3508 3509 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3510 aij = (Mat_SeqAIJ*)(Msub)->data; 3511 ii = aij->i; 3512 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3513 3514 /* 3515 m - number of local rows 3516 Ncols - number of columns (same on all processors) 3517 rstart - first row in new global matrix generated 3518 */ 3519 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3520 3521 if (call == MAT_INITIAL_MATRIX) { 3522 /* (4) Create parallel newmat */ 3523 PetscMPIInt rank,size; 3524 PetscInt csize; 3525 3526 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3527 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3528 3529 /* 3530 Determine the number of non-zeros in the diagonal and off-diagonal 3531 portions of the matrix in order to do correct preallocation 3532 */ 3533 3534 /* first get start and end of "diagonal" columns */ 3535 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3536 if (csize == PETSC_DECIDE) { 3537 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3538 if (mglobal == Ncols) { /* square matrix */ 3539 nlocal = m; 3540 } else { 3541 nlocal = Ncols/size + ((Ncols % size) > rank); 3542 } 3543 } else { 3544 nlocal = csize; 3545 } 3546 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3547 rstart = rend - nlocal; 3548 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3549 3550 /* next, compute all the lengths */ 3551 jj = aij->j; 3552 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3553 olens = dlens + m; 3554 for (i=0; i<m; i++) { 3555 jend = ii[i+1] - ii[i]; 3556 olen = 0; 3557 dlen = 0; 3558 for (j=0; j<jend; j++) { 3559 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3560 else dlen++; 3561 jj++; 3562 } 3563 olens[i] = olen; 3564 dlens[i] = dlen; 3565 } 3566 3567 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3568 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3569 3570 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3571 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3572 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3573 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3574 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3575 ierr = PetscFree(dlens);CHKERRQ(ierr); 3576 3577 } else { /* call == MAT_REUSE_MATRIX */ 3578 M = *newmat; 3579 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3580 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3581 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3582 /* 3583 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3584 rather than the slower MatSetValues(). 3585 */ 3586 M->was_assembled = PETSC_TRUE; 3587 M->assembled = PETSC_FALSE; 3588 } 3589 3590 /* (5) Set values of Msub to *newmat */ 3591 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3592 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3593 3594 jj = aij->j; 3595 aa = aij->a; 3596 for (i=0; i<m; i++) { 3597 row = rstart + i; 3598 nz = ii[i+1] - ii[i]; 3599 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3600 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3601 jj += nz; aa += nz; 3602 } 3603 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3604 3605 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3606 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3607 3608 ierr = PetscFree(colsub);CHKERRQ(ierr); 3609 3610 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3611 if (call == MAT_INITIAL_MATRIX) { 3612 *newmat = M; 3613 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3614 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3615 3616 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3617 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3618 3619 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3620 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3621 3622 if (iscol_local) { 3623 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3624 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3625 } 3626 } 3627 PetscFunctionReturn(0); 3628 } 3629 3630 /* 3631 Not great since it makes two copies of the submatrix, first an SeqAIJ 3632 in local and then by concatenating the local matrices the end result. 3633 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3634 3635 Note: This requires a sequential iscol with all indices. 3636 */ 3637 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3638 { 3639 PetscErrorCode ierr; 3640 PetscMPIInt rank,size; 3641 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3642 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3643 Mat M,Mreuse; 3644 MatScalar *aa,*vwork; 3645 MPI_Comm comm; 3646 Mat_SeqAIJ *aij; 3647 PetscBool colflag,allcolumns=PETSC_FALSE; 3648 3649 PetscFunctionBegin; 3650 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3651 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3652 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3653 3654 /* Check for special case: each processor gets entire matrix columns */ 3655 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3656 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3657 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3658 3659 if (call == MAT_REUSE_MATRIX) { 3660 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3661 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3662 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3663 } else { 3664 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3665 } 3666 3667 /* 3668 m - number of local rows 3669 n - number of columns (same on all processors) 3670 rstart - first row in new global matrix generated 3671 */ 3672 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3673 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3674 if (call == MAT_INITIAL_MATRIX) { 3675 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3676 ii = aij->i; 3677 jj = aij->j; 3678 3679 /* 3680 Determine the number of non-zeros in the diagonal and off-diagonal 3681 portions of the matrix in order to do correct preallocation 3682 */ 3683 3684 /* first get start and end of "diagonal" columns */ 3685 if (csize == PETSC_DECIDE) { 3686 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3687 if (mglobal == n) { /* square matrix */ 3688 nlocal = m; 3689 } else { 3690 nlocal = n/size + ((n % size) > rank); 3691 } 3692 } else { 3693 nlocal = csize; 3694 } 3695 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3696 rstart = rend - nlocal; 3697 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3698 3699 /* next, compute all the lengths */ 3700 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3701 olens = dlens + m; 3702 for (i=0; i<m; i++) { 3703 jend = ii[i+1] - ii[i]; 3704 olen = 0; 3705 dlen = 0; 3706 for (j=0; j<jend; j++) { 3707 if (*jj < rstart || *jj >= rend) olen++; 3708 else dlen++; 3709 jj++; 3710 } 3711 olens[i] = olen; 3712 dlens[i] = dlen; 3713 } 3714 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3715 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3716 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3717 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3718 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3719 ierr = PetscFree(dlens);CHKERRQ(ierr); 3720 } else { 3721 PetscInt ml,nl; 3722 3723 M = *newmat; 3724 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3725 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3726 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3727 /* 3728 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3729 rather than the slower MatSetValues(). 3730 */ 3731 M->was_assembled = PETSC_TRUE; 3732 M->assembled = PETSC_FALSE; 3733 } 3734 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3735 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3736 ii = aij->i; 3737 jj = aij->j; 3738 aa = aij->a; 3739 for (i=0; i<m; i++) { 3740 row = rstart + i; 3741 nz = ii[i+1] - ii[i]; 3742 cwork = jj; jj += nz; 3743 vwork = aa; aa += nz; 3744 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3745 } 3746 3747 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3748 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3749 *newmat = M; 3750 3751 /* save submatrix used in processor for next request */ 3752 if (call == MAT_INITIAL_MATRIX) { 3753 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3754 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3755 } 3756 PetscFunctionReturn(0); 3757 } 3758 3759 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3760 { 3761 PetscInt m,cstart, cend,j,nnz,i,d; 3762 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3763 const PetscInt *JJ; 3764 PetscErrorCode ierr; 3765 PetscBool nooffprocentries; 3766 3767 PetscFunctionBegin; 3768 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3769 3770 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3771 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3772 m = B->rmap->n; 3773 cstart = B->cmap->rstart; 3774 cend = B->cmap->rend; 3775 rstart = B->rmap->rstart; 3776 3777 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3778 3779 #if defined(PETSC_USE_DEBUG) 3780 for (i=0; i<m; i++) { 3781 nnz = Ii[i+1]- Ii[i]; 3782 JJ = J + Ii[i]; 3783 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3784 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3785 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3786 } 3787 #endif 3788 3789 for (i=0; i<m; i++) { 3790 nnz = Ii[i+1]- Ii[i]; 3791 JJ = J + Ii[i]; 3792 nnz_max = PetscMax(nnz_max,nnz); 3793 d = 0; 3794 for (j=0; j<nnz; j++) { 3795 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3796 } 3797 d_nnz[i] = d; 3798 o_nnz[i] = nnz - d; 3799 } 3800 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3801 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3802 3803 for (i=0; i<m; i++) { 3804 ii = i + rstart; 3805 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3806 } 3807 nooffprocentries = B->nooffprocentries; 3808 B->nooffprocentries = PETSC_TRUE; 3809 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3810 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3811 B->nooffprocentries = nooffprocentries; 3812 3813 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3814 PetscFunctionReturn(0); 3815 } 3816 3817 /*@ 3818 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3819 (the default parallel PETSc format). 3820 3821 Collective 3822 3823 Input Parameters: 3824 + B - the matrix 3825 . i - the indices into j for the start of each local row (starts with zero) 3826 . j - the column indices for each local row (starts with zero) 3827 - v - optional values in the matrix 3828 3829 Level: developer 3830 3831 Notes: 3832 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3833 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3834 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3835 3836 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3837 3838 The format which is used for the sparse matrix input, is equivalent to a 3839 row-major ordering.. i.e for the following matrix, the input data expected is 3840 as shown 3841 3842 $ 1 0 0 3843 $ 2 0 3 P0 3844 $ ------- 3845 $ 4 5 6 P1 3846 $ 3847 $ Process0 [P0]: rows_owned=[0,1] 3848 $ i = {0,1,3} [size = nrow+1 = 2+1] 3849 $ j = {0,0,2} [size = 3] 3850 $ v = {1,2,3} [size = 3] 3851 $ 3852 $ Process1 [P1]: rows_owned=[2] 3853 $ i = {0,3} [size = nrow+1 = 1+1] 3854 $ j = {0,1,2} [size = 3] 3855 $ v = {4,5,6} [size = 3] 3856 3857 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3858 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3859 @*/ 3860 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3861 { 3862 PetscErrorCode ierr; 3863 3864 PetscFunctionBegin; 3865 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3866 PetscFunctionReturn(0); 3867 } 3868 3869 /*@C 3870 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3871 (the default parallel PETSc format). For good matrix assembly performance 3872 the user should preallocate the matrix storage by setting the parameters 3873 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3874 performance can be increased by more than a factor of 50. 3875 3876 Collective 3877 3878 Input Parameters: 3879 + B - the matrix 3880 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3881 (same value is used for all local rows) 3882 . d_nnz - array containing the number of nonzeros in the various rows of the 3883 DIAGONAL portion of the local submatrix (possibly different for each row) 3884 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3885 The size of this array is equal to the number of local rows, i.e 'm'. 3886 For matrices that will be factored, you must leave room for (and set) 3887 the diagonal entry even if it is zero. 3888 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3889 submatrix (same value is used for all local rows). 3890 - o_nnz - array containing the number of nonzeros in the various rows of the 3891 OFF-DIAGONAL portion of the local submatrix (possibly different for 3892 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3893 structure. The size of this array is equal to the number 3894 of local rows, i.e 'm'. 3895 3896 If the *_nnz parameter is given then the *_nz parameter is ignored 3897 3898 The AIJ format (also called the Yale sparse matrix format or 3899 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3900 storage. The stored row and column indices begin with zero. 3901 See Users-Manual: ch_mat for details. 3902 3903 The parallel matrix is partitioned such that the first m0 rows belong to 3904 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3905 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3906 3907 The DIAGONAL portion of the local submatrix of a processor can be defined 3908 as the submatrix which is obtained by extraction the part corresponding to 3909 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3910 first row that belongs to the processor, r2 is the last row belonging to 3911 the this processor, and c1-c2 is range of indices of the local part of a 3912 vector suitable for applying the matrix to. This is an mxn matrix. In the 3913 common case of a square matrix, the row and column ranges are the same and 3914 the DIAGONAL part is also square. The remaining portion of the local 3915 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3916 3917 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3918 3919 You can call MatGetInfo() to get information on how effective the preallocation was; 3920 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3921 You can also run with the option -info and look for messages with the string 3922 malloc in them to see if additional memory allocation was needed. 3923 3924 Example usage: 3925 3926 Consider the following 8x8 matrix with 34 non-zero values, that is 3927 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3928 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3929 as follows: 3930 3931 .vb 3932 1 2 0 | 0 3 0 | 0 4 3933 Proc0 0 5 6 | 7 0 0 | 8 0 3934 9 0 10 | 11 0 0 | 12 0 3935 ------------------------------------- 3936 13 0 14 | 15 16 17 | 0 0 3937 Proc1 0 18 0 | 19 20 21 | 0 0 3938 0 0 0 | 22 23 0 | 24 0 3939 ------------------------------------- 3940 Proc2 25 26 27 | 0 0 28 | 29 0 3941 30 0 0 | 31 32 33 | 0 34 3942 .ve 3943 3944 This can be represented as a collection of submatrices as: 3945 3946 .vb 3947 A B C 3948 D E F 3949 G H I 3950 .ve 3951 3952 Where the submatrices A,B,C are owned by proc0, D,E,F are 3953 owned by proc1, G,H,I are owned by proc2. 3954 3955 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3956 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3957 The 'M','N' parameters are 8,8, and have the same values on all procs. 3958 3959 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3960 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3961 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3962 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3963 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3964 matrix, ans [DF] as another SeqAIJ matrix. 3965 3966 When d_nz, o_nz parameters are specified, d_nz storage elements are 3967 allocated for every row of the local diagonal submatrix, and o_nz 3968 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3969 One way to choose d_nz and o_nz is to use the max nonzerors per local 3970 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3971 In this case, the values of d_nz,o_nz are: 3972 .vb 3973 proc0 : dnz = 2, o_nz = 2 3974 proc1 : dnz = 3, o_nz = 2 3975 proc2 : dnz = 1, o_nz = 4 3976 .ve 3977 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3978 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3979 for proc3. i.e we are using 12+15+10=37 storage locations to store 3980 34 values. 3981 3982 When d_nnz, o_nnz parameters are specified, the storage is specified 3983 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3984 In the above case the values for d_nnz,o_nnz are: 3985 .vb 3986 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3987 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3988 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3989 .ve 3990 Here the space allocated is sum of all the above values i.e 34, and 3991 hence pre-allocation is perfect. 3992 3993 Level: intermediate 3994 3995 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3996 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3997 @*/ 3998 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3999 { 4000 PetscErrorCode ierr; 4001 4002 PetscFunctionBegin; 4003 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4004 PetscValidType(B,1); 4005 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4006 PetscFunctionReturn(0); 4007 } 4008 4009 /*@ 4010 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4011 CSR format for the local rows. 4012 4013 Collective 4014 4015 Input Parameters: 4016 + comm - MPI communicator 4017 . m - number of local rows (Cannot be PETSC_DECIDE) 4018 . n - This value should be the same as the local size used in creating the 4019 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4020 calculated if N is given) For square matrices n is almost always m. 4021 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4022 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4023 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4024 . j - column indices 4025 - a - matrix values 4026 4027 Output Parameter: 4028 . mat - the matrix 4029 4030 Level: intermediate 4031 4032 Notes: 4033 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4034 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4035 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4036 4037 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4038 4039 The format which is used for the sparse matrix input, is equivalent to a 4040 row-major ordering.. i.e for the following matrix, the input data expected is 4041 as shown 4042 4043 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4044 4045 $ 1 0 0 4046 $ 2 0 3 P0 4047 $ ------- 4048 $ 4 5 6 P1 4049 $ 4050 $ Process0 [P0]: rows_owned=[0,1] 4051 $ i = {0,1,3} [size = nrow+1 = 2+1] 4052 $ j = {0,0,2} [size = 3] 4053 $ v = {1,2,3} [size = 3] 4054 $ 4055 $ Process1 [P1]: rows_owned=[2] 4056 $ i = {0,3} [size = nrow+1 = 1+1] 4057 $ j = {0,1,2} [size = 3] 4058 $ v = {4,5,6} [size = 3] 4059 4060 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4061 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4062 @*/ 4063 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4064 { 4065 PetscErrorCode ierr; 4066 4067 PetscFunctionBegin; 4068 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4069 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4070 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4071 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4072 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4073 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4074 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4075 PetscFunctionReturn(0); 4076 } 4077 4078 /*@ 4079 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4080 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4081 4082 Collective 4083 4084 Input Parameters: 4085 + mat - the matrix 4086 . m - number of local rows (Cannot be PETSC_DECIDE) 4087 . n - This value should be the same as the local size used in creating the 4088 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4089 calculated if N is given) For square matrices n is almost always m. 4090 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4091 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4092 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4093 . J - column indices 4094 - v - matrix values 4095 4096 Level: intermediate 4097 4098 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4099 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4100 @*/ 4101 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4102 { 4103 PetscErrorCode ierr; 4104 PetscInt cstart,nnz,i,j; 4105 PetscInt *ld; 4106 PetscBool nooffprocentries; 4107 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4108 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4109 PetscScalar *ad = Ad->a, *ao = Ao->a; 4110 const PetscInt *Adi = Ad->i; 4111 PetscInt ldi,Iii,md; 4112 4113 PetscFunctionBegin; 4114 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4115 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4116 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4117 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4118 4119 cstart = mat->cmap->rstart; 4120 if (!Aij->ld) { 4121 /* count number of entries below block diagonal */ 4122 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4123 Aij->ld = ld; 4124 for (i=0; i<m; i++) { 4125 nnz = Ii[i+1]- Ii[i]; 4126 j = 0; 4127 while (J[j] < cstart && j < nnz) {j++;} 4128 J += nnz; 4129 ld[i] = j; 4130 } 4131 } else { 4132 ld = Aij->ld; 4133 } 4134 4135 for (i=0; i<m; i++) { 4136 nnz = Ii[i+1]- Ii[i]; 4137 Iii = Ii[i]; 4138 ldi = ld[i]; 4139 md = Adi[i+1]-Adi[i]; 4140 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4141 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4142 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4143 ad += md; 4144 ao += nnz - md; 4145 } 4146 nooffprocentries = mat->nooffprocentries; 4147 mat->nooffprocentries = PETSC_TRUE; 4148 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4149 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4150 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4151 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4152 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4153 mat->nooffprocentries = nooffprocentries; 4154 PetscFunctionReturn(0); 4155 } 4156 4157 /*@C 4158 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4159 (the default parallel PETSc format). For good matrix assembly performance 4160 the user should preallocate the matrix storage by setting the parameters 4161 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4162 performance can be increased by more than a factor of 50. 4163 4164 Collective 4165 4166 Input Parameters: 4167 + comm - MPI communicator 4168 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4169 This value should be the same as the local size used in creating the 4170 y vector for the matrix-vector product y = Ax. 4171 . n - This value should be the same as the local size used in creating the 4172 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4173 calculated if N is given) For square matrices n is almost always m. 4174 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4175 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4176 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4177 (same value is used for all local rows) 4178 . d_nnz - array containing the number of nonzeros in the various rows of the 4179 DIAGONAL portion of the local submatrix (possibly different for each row) 4180 or NULL, if d_nz is used to specify the nonzero structure. 4181 The size of this array is equal to the number of local rows, i.e 'm'. 4182 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4183 submatrix (same value is used for all local rows). 4184 - o_nnz - array containing the number of nonzeros in the various rows of the 4185 OFF-DIAGONAL portion of the local submatrix (possibly different for 4186 each row) or NULL, if o_nz is used to specify the nonzero 4187 structure. The size of this array is equal to the number 4188 of local rows, i.e 'm'. 4189 4190 Output Parameter: 4191 . A - the matrix 4192 4193 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4194 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4195 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4196 4197 Notes: 4198 If the *_nnz parameter is given then the *_nz parameter is ignored 4199 4200 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4201 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4202 storage requirements for this matrix. 4203 4204 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4205 processor than it must be used on all processors that share the object for 4206 that argument. 4207 4208 The user MUST specify either the local or global matrix dimensions 4209 (possibly both). 4210 4211 The parallel matrix is partitioned across processors such that the 4212 first m0 rows belong to process 0, the next m1 rows belong to 4213 process 1, the next m2 rows belong to process 2 etc.. where 4214 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4215 values corresponding to [m x N] submatrix. 4216 4217 The columns are logically partitioned with the n0 columns belonging 4218 to 0th partition, the next n1 columns belonging to the next 4219 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4220 4221 The DIAGONAL portion of the local submatrix on any given processor 4222 is the submatrix corresponding to the rows and columns m,n 4223 corresponding to the given processor. i.e diagonal matrix on 4224 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4225 etc. The remaining portion of the local submatrix [m x (N-n)] 4226 constitute the OFF-DIAGONAL portion. The example below better 4227 illustrates this concept. 4228 4229 For a square global matrix we define each processor's diagonal portion 4230 to be its local rows and the corresponding columns (a square submatrix); 4231 each processor's off-diagonal portion encompasses the remainder of the 4232 local matrix (a rectangular submatrix). 4233 4234 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4235 4236 When calling this routine with a single process communicator, a matrix of 4237 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4238 type of communicator, use the construction mechanism 4239 .vb 4240 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4241 .ve 4242 4243 $ MatCreate(...,&A); 4244 $ MatSetType(A,MATMPIAIJ); 4245 $ MatSetSizes(A, m,n,M,N); 4246 $ MatMPIAIJSetPreallocation(A,...); 4247 4248 By default, this format uses inodes (identical nodes) when possible. 4249 We search for consecutive rows with the same nonzero structure, thereby 4250 reusing matrix information to achieve increased efficiency. 4251 4252 Options Database Keys: 4253 + -mat_no_inode - Do not use inodes 4254 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4255 4256 4257 4258 Example usage: 4259 4260 Consider the following 8x8 matrix with 34 non-zero values, that is 4261 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4262 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4263 as follows 4264 4265 .vb 4266 1 2 0 | 0 3 0 | 0 4 4267 Proc0 0 5 6 | 7 0 0 | 8 0 4268 9 0 10 | 11 0 0 | 12 0 4269 ------------------------------------- 4270 13 0 14 | 15 16 17 | 0 0 4271 Proc1 0 18 0 | 19 20 21 | 0 0 4272 0 0 0 | 22 23 0 | 24 0 4273 ------------------------------------- 4274 Proc2 25 26 27 | 0 0 28 | 29 0 4275 30 0 0 | 31 32 33 | 0 34 4276 .ve 4277 4278 This can be represented as a collection of submatrices as 4279 4280 .vb 4281 A B C 4282 D E F 4283 G H I 4284 .ve 4285 4286 Where the submatrices A,B,C are owned by proc0, D,E,F are 4287 owned by proc1, G,H,I are owned by proc2. 4288 4289 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4290 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4291 The 'M','N' parameters are 8,8, and have the same values on all procs. 4292 4293 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4294 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4295 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4296 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4297 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4298 matrix, ans [DF] as another SeqAIJ matrix. 4299 4300 When d_nz, o_nz parameters are specified, d_nz storage elements are 4301 allocated for every row of the local diagonal submatrix, and o_nz 4302 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4303 One way to choose d_nz and o_nz is to use the max nonzerors per local 4304 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4305 In this case, the values of d_nz,o_nz are 4306 .vb 4307 proc0 : dnz = 2, o_nz = 2 4308 proc1 : dnz = 3, o_nz = 2 4309 proc2 : dnz = 1, o_nz = 4 4310 .ve 4311 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4312 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4313 for proc3. i.e we are using 12+15+10=37 storage locations to store 4314 34 values. 4315 4316 When d_nnz, o_nnz parameters are specified, the storage is specified 4317 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4318 In the above case the values for d_nnz,o_nnz are 4319 .vb 4320 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4321 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4322 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4323 .ve 4324 Here the space allocated is sum of all the above values i.e 34, and 4325 hence pre-allocation is perfect. 4326 4327 Level: intermediate 4328 4329 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4330 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4331 @*/ 4332 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4333 { 4334 PetscErrorCode ierr; 4335 PetscMPIInt size; 4336 4337 PetscFunctionBegin; 4338 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4339 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4340 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4341 if (size > 1) { 4342 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4343 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4344 } else { 4345 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4346 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4347 } 4348 PetscFunctionReturn(0); 4349 } 4350 4351 /*@C 4352 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4353 4354 Not collective 4355 4356 Input Parameter: 4357 . A - The MPIAIJ matrix 4358 4359 Output Parameters: 4360 + Ad - The local diagonal block as a SeqAIJ matrix 4361 . Ao - The local off-diagonal block as a SeqAIJ matrix 4362 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4363 4364 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4365 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4366 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4367 local column numbers to global column numbers in the original matrix. 4368 4369 Level: intermediate 4370 4371 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAJ, MATSEQAIJ 4372 @*/ 4373 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4374 { 4375 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4376 PetscBool flg; 4377 PetscErrorCode ierr; 4378 4379 PetscFunctionBegin; 4380 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4381 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4382 if (Ad) *Ad = a->A; 4383 if (Ao) *Ao = a->B; 4384 if (colmap) *colmap = a->garray; 4385 PetscFunctionReturn(0); 4386 } 4387 4388 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4389 { 4390 PetscErrorCode ierr; 4391 PetscInt m,N,i,rstart,nnz,Ii; 4392 PetscInt *indx; 4393 PetscScalar *values; 4394 4395 PetscFunctionBegin; 4396 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4397 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4398 PetscInt *dnz,*onz,sum,bs,cbs; 4399 4400 if (n == PETSC_DECIDE) { 4401 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4402 } 4403 /* Check sum(n) = N */ 4404 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4405 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4406 4407 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4408 rstart -= m; 4409 4410 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4411 for (i=0; i<m; i++) { 4412 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4413 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4414 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4415 } 4416 4417 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4418 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4419 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4420 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4421 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4422 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4423 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4424 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4425 } 4426 4427 /* numeric phase */ 4428 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4429 for (i=0; i<m; i++) { 4430 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4431 Ii = i + rstart; 4432 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4433 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4434 } 4435 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4436 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4437 PetscFunctionReturn(0); 4438 } 4439 4440 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4441 { 4442 PetscErrorCode ierr; 4443 PetscMPIInt rank; 4444 PetscInt m,N,i,rstart,nnz; 4445 size_t len; 4446 const PetscInt *indx; 4447 PetscViewer out; 4448 char *name; 4449 Mat B; 4450 const PetscScalar *values; 4451 4452 PetscFunctionBegin; 4453 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4454 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4455 /* Should this be the type of the diagonal block of A? */ 4456 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4457 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4458 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4459 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4460 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4461 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4462 for (i=0; i<m; i++) { 4463 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4464 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4465 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4466 } 4467 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4468 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4469 4470 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4471 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4472 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4473 sprintf(name,"%s.%d",outfile,rank); 4474 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4475 ierr = PetscFree(name);CHKERRQ(ierr); 4476 ierr = MatView(B,out);CHKERRQ(ierr); 4477 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4478 ierr = MatDestroy(&B);CHKERRQ(ierr); 4479 PetscFunctionReturn(0); 4480 } 4481 4482 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4483 { 4484 PetscErrorCode ierr; 4485 Mat_Merge_SeqsToMPI *merge; 4486 PetscContainer container; 4487 4488 PetscFunctionBegin; 4489 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4490 if (container) { 4491 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4492 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4504 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4505 ierr = PetscFree(merge);CHKERRQ(ierr); 4506 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4507 } 4508 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #include <../src/mat/utils/freespace.h> 4513 #include <petscbt.h> 4514 4515 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4516 { 4517 PetscErrorCode ierr; 4518 MPI_Comm comm; 4519 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4520 PetscMPIInt size,rank,taga,*len_s; 4521 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4522 PetscInt proc,m; 4523 PetscInt **buf_ri,**buf_rj; 4524 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4525 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4526 MPI_Request *s_waits,*r_waits; 4527 MPI_Status *status; 4528 MatScalar *aa=a->a; 4529 MatScalar **abuf_r,*ba_i; 4530 Mat_Merge_SeqsToMPI *merge; 4531 PetscContainer container; 4532 4533 PetscFunctionBegin; 4534 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4535 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4536 4537 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4538 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4539 4540 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4541 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4542 4543 bi = merge->bi; 4544 bj = merge->bj; 4545 buf_ri = merge->buf_ri; 4546 buf_rj = merge->buf_rj; 4547 4548 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4549 owners = merge->rowmap->range; 4550 len_s = merge->len_s; 4551 4552 /* send and recv matrix values */ 4553 /*-----------------------------*/ 4554 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4555 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4556 4557 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4558 for (proc=0,k=0; proc<size; proc++) { 4559 if (!len_s[proc]) continue; 4560 i = owners[proc]; 4561 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4562 k++; 4563 } 4564 4565 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4566 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4567 ierr = PetscFree(status);CHKERRQ(ierr); 4568 4569 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4570 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4571 4572 /* insert mat values of mpimat */ 4573 /*----------------------------*/ 4574 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4575 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4576 4577 for (k=0; k<merge->nrecv; k++) { 4578 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4579 nrows = *(buf_ri_k[k]); 4580 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4581 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4582 } 4583 4584 /* set values of ba */ 4585 m = merge->rowmap->n; 4586 for (i=0; i<m; i++) { 4587 arow = owners[rank] + i; 4588 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4589 bnzi = bi[i+1] - bi[i]; 4590 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4591 4592 /* add local non-zero vals of this proc's seqmat into ba */ 4593 anzi = ai[arow+1] - ai[arow]; 4594 aj = a->j + ai[arow]; 4595 aa = a->a + ai[arow]; 4596 nextaj = 0; 4597 for (j=0; nextaj<anzi; j++) { 4598 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4599 ba_i[j] += aa[nextaj++]; 4600 } 4601 } 4602 4603 /* add received vals into ba */ 4604 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4605 /* i-th row */ 4606 if (i == *nextrow[k]) { 4607 anzi = *(nextai[k]+1) - *nextai[k]; 4608 aj = buf_rj[k] + *(nextai[k]); 4609 aa = abuf_r[k] + *(nextai[k]); 4610 nextaj = 0; 4611 for (j=0; nextaj<anzi; j++) { 4612 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4613 ba_i[j] += aa[nextaj++]; 4614 } 4615 } 4616 nextrow[k]++; nextai[k]++; 4617 } 4618 } 4619 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4620 } 4621 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4622 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4623 4624 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4625 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4626 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4627 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4628 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4629 PetscFunctionReturn(0); 4630 } 4631 4632 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4633 { 4634 PetscErrorCode ierr; 4635 Mat B_mpi; 4636 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4637 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4638 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4639 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4640 PetscInt len,proc,*dnz,*onz,bs,cbs; 4641 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4642 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4643 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4644 MPI_Status *status; 4645 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4646 PetscBT lnkbt; 4647 Mat_Merge_SeqsToMPI *merge; 4648 PetscContainer container; 4649 4650 PetscFunctionBegin; 4651 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4652 4653 /* make sure it is a PETSc comm */ 4654 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4655 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4656 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4657 4658 ierr = PetscNew(&merge);CHKERRQ(ierr); 4659 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4660 4661 /* determine row ownership */ 4662 /*---------------------------------------------------------*/ 4663 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4664 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4665 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4666 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4667 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4668 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4669 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4670 4671 m = merge->rowmap->n; 4672 owners = merge->rowmap->range; 4673 4674 /* determine the number of messages to send, their lengths */ 4675 /*---------------------------------------------------------*/ 4676 len_s = merge->len_s; 4677 4678 len = 0; /* length of buf_si[] */ 4679 merge->nsend = 0; 4680 for (proc=0; proc<size; proc++) { 4681 len_si[proc] = 0; 4682 if (proc == rank) { 4683 len_s[proc] = 0; 4684 } else { 4685 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4686 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4687 } 4688 if (len_s[proc]) { 4689 merge->nsend++; 4690 nrows = 0; 4691 for (i=owners[proc]; i<owners[proc+1]; i++) { 4692 if (ai[i+1] > ai[i]) nrows++; 4693 } 4694 len_si[proc] = 2*(nrows+1); 4695 len += len_si[proc]; 4696 } 4697 } 4698 4699 /* determine the number and length of messages to receive for ij-structure */ 4700 /*-------------------------------------------------------------------------*/ 4701 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4702 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4703 4704 /* post the Irecv of j-structure */ 4705 /*-------------------------------*/ 4706 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4707 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4708 4709 /* post the Isend of j-structure */ 4710 /*--------------------------------*/ 4711 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4712 4713 for (proc=0, k=0; proc<size; proc++) { 4714 if (!len_s[proc]) continue; 4715 i = owners[proc]; 4716 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4717 k++; 4718 } 4719 4720 /* receives and sends of j-structure are complete */ 4721 /*------------------------------------------------*/ 4722 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4723 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4724 4725 /* send and recv i-structure */ 4726 /*---------------------------*/ 4727 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4728 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4729 4730 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4731 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4732 for (proc=0,k=0; proc<size; proc++) { 4733 if (!len_s[proc]) continue; 4734 /* form outgoing message for i-structure: 4735 buf_si[0]: nrows to be sent 4736 [1:nrows]: row index (global) 4737 [nrows+1:2*nrows+1]: i-structure index 4738 */ 4739 /*-------------------------------------------*/ 4740 nrows = len_si[proc]/2 - 1; 4741 buf_si_i = buf_si + nrows+1; 4742 buf_si[0] = nrows; 4743 buf_si_i[0] = 0; 4744 nrows = 0; 4745 for (i=owners[proc]; i<owners[proc+1]; i++) { 4746 anzi = ai[i+1] - ai[i]; 4747 if (anzi) { 4748 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4749 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4750 nrows++; 4751 } 4752 } 4753 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4754 k++; 4755 buf_si += len_si[proc]; 4756 } 4757 4758 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4759 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4760 4761 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4762 for (i=0; i<merge->nrecv; i++) { 4763 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4764 } 4765 4766 ierr = PetscFree(len_si);CHKERRQ(ierr); 4767 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4768 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4769 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4770 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4771 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4772 ierr = PetscFree(status);CHKERRQ(ierr); 4773 4774 /* compute a local seq matrix in each processor */ 4775 /*----------------------------------------------*/ 4776 /* allocate bi array and free space for accumulating nonzero column info */ 4777 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4778 bi[0] = 0; 4779 4780 /* create and initialize a linked list */ 4781 nlnk = N+1; 4782 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4783 4784 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4785 len = ai[owners[rank+1]] - ai[owners[rank]]; 4786 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4787 4788 current_space = free_space; 4789 4790 /* determine symbolic info for each local row */ 4791 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4792 4793 for (k=0; k<merge->nrecv; k++) { 4794 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4795 nrows = *buf_ri_k[k]; 4796 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4797 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4798 } 4799 4800 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4801 len = 0; 4802 for (i=0; i<m; i++) { 4803 bnzi = 0; 4804 /* add local non-zero cols of this proc's seqmat into lnk */ 4805 arow = owners[rank] + i; 4806 anzi = ai[arow+1] - ai[arow]; 4807 aj = a->j + ai[arow]; 4808 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4809 bnzi += nlnk; 4810 /* add received col data into lnk */ 4811 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4812 if (i == *nextrow[k]) { /* i-th row */ 4813 anzi = *(nextai[k]+1) - *nextai[k]; 4814 aj = buf_rj[k] + *nextai[k]; 4815 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4816 bnzi += nlnk; 4817 nextrow[k]++; nextai[k]++; 4818 } 4819 } 4820 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4821 4822 /* if free space is not available, make more free space */ 4823 if (current_space->local_remaining<bnzi) { 4824 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4825 nspacedouble++; 4826 } 4827 /* copy data into free space, then initialize lnk */ 4828 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4829 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4830 4831 current_space->array += bnzi; 4832 current_space->local_used += bnzi; 4833 current_space->local_remaining -= bnzi; 4834 4835 bi[i+1] = bi[i] + bnzi; 4836 } 4837 4838 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4839 4840 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4841 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4842 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4843 4844 /* create symbolic parallel matrix B_mpi */ 4845 /*---------------------------------------*/ 4846 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4847 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4848 if (n==PETSC_DECIDE) { 4849 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4850 } else { 4851 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4852 } 4853 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4854 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4855 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4856 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4857 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4858 4859 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4860 B_mpi->assembled = PETSC_FALSE; 4861 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4862 merge->bi = bi; 4863 merge->bj = bj; 4864 merge->buf_ri = buf_ri; 4865 merge->buf_rj = buf_rj; 4866 merge->coi = NULL; 4867 merge->coj = NULL; 4868 merge->owners_co = NULL; 4869 4870 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4871 4872 /* attach the supporting struct to B_mpi for reuse */ 4873 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4874 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4875 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4876 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4877 *mpimat = B_mpi; 4878 4879 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4880 PetscFunctionReturn(0); 4881 } 4882 4883 /*@C 4884 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4885 matrices from each processor 4886 4887 Collective 4888 4889 Input Parameters: 4890 + comm - the communicators the parallel matrix will live on 4891 . seqmat - the input sequential matrices 4892 . m - number of local rows (or PETSC_DECIDE) 4893 . n - number of local columns (or PETSC_DECIDE) 4894 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4895 4896 Output Parameter: 4897 . mpimat - the parallel matrix generated 4898 4899 Level: advanced 4900 4901 Notes: 4902 The dimensions of the sequential matrix in each processor MUST be the same. 4903 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4904 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4905 @*/ 4906 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4907 { 4908 PetscErrorCode ierr; 4909 PetscMPIInt size; 4910 4911 PetscFunctionBegin; 4912 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4913 if (size == 1) { 4914 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4915 if (scall == MAT_INITIAL_MATRIX) { 4916 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4917 } else { 4918 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4919 } 4920 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4921 PetscFunctionReturn(0); 4922 } 4923 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4924 if (scall == MAT_INITIAL_MATRIX) { 4925 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4926 } 4927 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4928 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4929 PetscFunctionReturn(0); 4930 } 4931 4932 /*@ 4933 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4934 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4935 with MatGetSize() 4936 4937 Not Collective 4938 4939 Input Parameters: 4940 + A - the matrix 4941 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4942 4943 Output Parameter: 4944 . A_loc - the local sequential matrix generated 4945 4946 Level: developer 4947 4948 Notes: 4949 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4950 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4951 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4952 modify the values of the returned A_loc. 4953 4954 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4955 4956 @*/ 4957 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4958 { 4959 PetscErrorCode ierr; 4960 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4961 Mat_SeqAIJ *mat,*a,*b; 4962 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4963 MatScalar *aa,*ba,*cam; 4964 PetscScalar *ca; 4965 PetscMPIInt size; 4966 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4967 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4968 PetscBool match; 4969 4970 PetscFunctionBegin; 4971 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 4972 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4973 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4974 if (size == 1) { 4975 if (scall == MAT_INITIAL_MATRIX) { 4976 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 4977 *A_loc = mpimat->A; 4978 } else if (scall == MAT_REUSE_MATRIX) { 4979 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4980 } 4981 PetscFunctionReturn(0); 4982 } 4983 4984 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4985 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4986 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4987 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4988 aa = a->a; ba = b->a; 4989 if (scall == MAT_INITIAL_MATRIX) { 4990 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4991 ci[0] = 0; 4992 for (i=0; i<am; i++) { 4993 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4994 } 4995 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4996 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4997 k = 0; 4998 for (i=0; i<am; i++) { 4999 ncols_o = bi[i+1] - bi[i]; 5000 ncols_d = ai[i+1] - ai[i]; 5001 /* off-diagonal portion of A */ 5002 for (jo=0; jo<ncols_o; jo++) { 5003 col = cmap[*bj]; 5004 if (col >= cstart) break; 5005 cj[k] = col; bj++; 5006 ca[k++] = *ba++; 5007 } 5008 /* diagonal portion of A */ 5009 for (j=0; j<ncols_d; j++) { 5010 cj[k] = cstart + *aj++; 5011 ca[k++] = *aa++; 5012 } 5013 /* off-diagonal portion of A */ 5014 for (j=jo; j<ncols_o; j++) { 5015 cj[k] = cmap[*bj++]; 5016 ca[k++] = *ba++; 5017 } 5018 } 5019 /* put together the new matrix */ 5020 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5021 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5022 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5023 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5024 mat->free_a = PETSC_TRUE; 5025 mat->free_ij = PETSC_TRUE; 5026 mat->nonew = 0; 5027 } else if (scall == MAT_REUSE_MATRIX) { 5028 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5029 ci = mat->i; cj = mat->j; cam = mat->a; 5030 for (i=0; i<am; i++) { 5031 /* off-diagonal portion of A */ 5032 ncols_o = bi[i+1] - bi[i]; 5033 for (jo=0; jo<ncols_o; jo++) { 5034 col = cmap[*bj]; 5035 if (col >= cstart) break; 5036 *cam++ = *ba++; bj++; 5037 } 5038 /* diagonal portion of A */ 5039 ncols_d = ai[i+1] - ai[i]; 5040 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5041 /* off-diagonal portion of A */ 5042 for (j=jo; j<ncols_o; j++) { 5043 *cam++ = *ba++; bj++; 5044 } 5045 } 5046 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5047 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5048 PetscFunctionReturn(0); 5049 } 5050 5051 /*@C 5052 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5053 5054 Not Collective 5055 5056 Input Parameters: 5057 + A - the matrix 5058 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5059 - row, col - index sets of rows and columns to extract (or NULL) 5060 5061 Output Parameter: 5062 . A_loc - the local sequential matrix generated 5063 5064 Level: developer 5065 5066 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5067 5068 @*/ 5069 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5070 { 5071 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5072 PetscErrorCode ierr; 5073 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5074 IS isrowa,iscola; 5075 Mat *aloc; 5076 PetscBool match; 5077 5078 PetscFunctionBegin; 5079 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5080 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5081 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5082 if (!row) { 5083 start = A->rmap->rstart; end = A->rmap->rend; 5084 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5085 } else { 5086 isrowa = *row; 5087 } 5088 if (!col) { 5089 start = A->cmap->rstart; 5090 cmap = a->garray; 5091 nzA = a->A->cmap->n; 5092 nzB = a->B->cmap->n; 5093 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5094 ncols = 0; 5095 for (i=0; i<nzB; i++) { 5096 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5097 else break; 5098 } 5099 imark = i; 5100 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5101 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5102 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5103 } else { 5104 iscola = *col; 5105 } 5106 if (scall != MAT_INITIAL_MATRIX) { 5107 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5108 aloc[0] = *A_loc; 5109 } 5110 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5111 if (!col) { /* attach global id of condensed columns */ 5112 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5113 } 5114 *A_loc = aloc[0]; 5115 ierr = PetscFree(aloc);CHKERRQ(ierr); 5116 if (!row) { 5117 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5118 } 5119 if (!col) { 5120 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5121 } 5122 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5123 PetscFunctionReturn(0); 5124 } 5125 5126 /* 5127 * Destroy a mat that may be composed with PetscSF communication objects. 5128 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5129 * */ 5130 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5131 { 5132 PetscSF sf,osf; 5133 IS map; 5134 PetscErrorCode ierr; 5135 5136 PetscFunctionBegin; 5137 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5138 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5139 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5140 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5141 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5142 ierr = ISDestroy(&map);CHKERRQ(ierr); 5143 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5144 PetscFunctionReturn(0); 5145 } 5146 5147 /* 5148 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5149 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5150 * on a global size. 5151 * */ 5152 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5153 { 5154 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5155 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5156 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5157 PetscMPIInt owner; 5158 PetscSFNode *iremote,*oiremote; 5159 const PetscInt *lrowindices; 5160 PetscErrorCode ierr; 5161 PetscSF sf,osf; 5162 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5163 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5164 MPI_Comm comm; 5165 ISLocalToGlobalMapping mapping; 5166 5167 PetscFunctionBegin; 5168 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5169 /* plocalsize is the number of roots 5170 * nrows is the number of leaves 5171 * */ 5172 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5173 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5174 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5175 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5176 for (i=0;i<nrows;i++) { 5177 /* Find a remote index and an owner for a row 5178 * The row could be local or remote 5179 * */ 5180 owner = 0; 5181 lidx = 0; 5182 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5183 iremote[i].index = lidx; 5184 iremote[i].rank = owner; 5185 } 5186 /* Create SF to communicate how many nonzero columns for each row */ 5187 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5188 /* SF will figure out the number of nonzero colunms for each row, and their 5189 * offsets 5190 * */ 5191 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5192 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5193 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5194 5195 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5196 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5197 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5198 roffsets[0] = 0; 5199 roffsets[1] = 0; 5200 for (i=0;i<plocalsize;i++) { 5201 /* diag */ 5202 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5203 /* off diag */ 5204 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5205 /* compute offsets so that we relative location for each row */ 5206 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5207 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5208 } 5209 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5210 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5211 /* 'r' means root, and 'l' means leaf */ 5212 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5213 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5214 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5215 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5216 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5217 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5218 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5219 dntotalcols = 0; 5220 ontotalcols = 0; 5221 ncol = 0; 5222 for (i=0;i<nrows;i++) { 5223 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5224 ncol = PetscMax(pnnz[i],ncol); 5225 /* diag */ 5226 dntotalcols += nlcols[i*2+0]; 5227 /* off diag */ 5228 ontotalcols += nlcols[i*2+1]; 5229 } 5230 /* We do not need to figure the right number of columns 5231 * since all the calculations will be done by going through the raw data 5232 * */ 5233 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5234 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5235 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5236 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5237 /* diag */ 5238 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5239 /* off diag */ 5240 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5241 /* diag */ 5242 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5243 /* off diag */ 5244 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5245 dntotalcols = 0; 5246 ontotalcols = 0; 5247 ntotalcols = 0; 5248 for (i=0;i<nrows;i++) { 5249 owner = 0; 5250 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5251 /* Set iremote for diag matrix */ 5252 for (j=0;j<nlcols[i*2+0];j++) { 5253 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5254 iremote[dntotalcols].rank = owner; 5255 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5256 ilocal[dntotalcols++] = ntotalcols++; 5257 } 5258 /* off diag */ 5259 for (j=0;j<nlcols[i*2+1];j++) { 5260 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5261 oiremote[ontotalcols].rank = owner; 5262 oilocal[ontotalcols++] = ntotalcols++; 5263 } 5264 } 5265 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5266 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5267 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5268 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5269 /* P serves as roots and P_oth is leaves 5270 * Diag matrix 5271 * */ 5272 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5273 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5274 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5275 5276 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5277 /* Off diag */ 5278 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5279 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5280 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5281 /* We operate on the matrix internal data for saving memory */ 5282 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5283 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5284 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5285 /* Convert to global indices for diag matrix */ 5286 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5287 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5288 /* We want P_oth store global indices */ 5289 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5290 /* Use memory scalable approach */ 5291 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5292 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5293 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5294 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5295 /* Convert back to local indices */ 5296 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5297 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5298 nout = 0; 5299 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5300 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5301 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5302 /* Exchange values */ 5303 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5304 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5305 /* Stop PETSc from shrinking memory */ 5306 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5307 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5308 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5309 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5310 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5311 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5312 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5313 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5314 PetscFunctionReturn(0); 5315 } 5316 5317 /* 5318 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5319 * This supports MPIAIJ and MAIJ 5320 * */ 5321 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5322 { 5323 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5324 Mat_SeqAIJ *p_oth; 5325 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5326 IS rows,map; 5327 PetscHMapI hamp; 5328 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5329 MPI_Comm comm; 5330 PetscSF sf,osf; 5331 PetscBool has; 5332 PetscErrorCode ierr; 5333 5334 PetscFunctionBegin; 5335 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5336 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5337 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5338 * and then create a submatrix (that often is an overlapping matrix) 5339 * */ 5340 if (reuse==MAT_INITIAL_MATRIX) { 5341 /* Use a hash table to figure out unique keys */ 5342 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5343 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5344 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5345 count = 0; 5346 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5347 for (i=0;i<a->B->cmap->n;i++) { 5348 key = a->garray[i]/dof; 5349 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5350 if (!has) { 5351 mapping[i] = count; 5352 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5353 } else { 5354 /* Current 'i' has the same value the previous step */ 5355 mapping[i] = count-1; 5356 } 5357 } 5358 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5359 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5360 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5361 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5362 off = 0; 5363 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5364 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5365 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5366 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5367 /* In case, the matrix was already created but users want to recreate the matrix */ 5368 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5369 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5370 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5371 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5372 } else if (reuse==MAT_REUSE_MATRIX) { 5373 /* If matrix was already created, we simply update values using SF objects 5374 * that as attached to the matrix ealier. 5375 * */ 5376 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5377 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5378 if (!sf || !osf) { 5379 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5380 } 5381 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5382 /* Update values in place */ 5383 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5384 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5385 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5386 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5387 } else { 5388 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5389 } 5390 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5391 PetscFunctionReturn(0); 5392 } 5393 5394 /*@C 5395 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5396 5397 Collective on Mat 5398 5399 Input Parameters: 5400 + A,B - the matrices in mpiaij format 5401 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5402 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5403 5404 Output Parameter: 5405 + rowb, colb - index sets of rows and columns of B to extract 5406 - B_seq - the sequential matrix generated 5407 5408 Level: developer 5409 5410 @*/ 5411 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5412 { 5413 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5414 PetscErrorCode ierr; 5415 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5416 IS isrowb,iscolb; 5417 Mat *bseq=NULL; 5418 5419 PetscFunctionBegin; 5420 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5421 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5422 } 5423 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5424 5425 if (scall == MAT_INITIAL_MATRIX) { 5426 start = A->cmap->rstart; 5427 cmap = a->garray; 5428 nzA = a->A->cmap->n; 5429 nzB = a->B->cmap->n; 5430 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5431 ncols = 0; 5432 for (i=0; i<nzB; i++) { /* row < local row index */ 5433 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5434 else break; 5435 } 5436 imark = i; 5437 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5438 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5439 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5440 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5441 } else { 5442 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5443 isrowb = *rowb; iscolb = *colb; 5444 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5445 bseq[0] = *B_seq; 5446 } 5447 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5448 *B_seq = bseq[0]; 5449 ierr = PetscFree(bseq);CHKERRQ(ierr); 5450 if (!rowb) { 5451 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5452 } else { 5453 *rowb = isrowb; 5454 } 5455 if (!colb) { 5456 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5457 } else { 5458 *colb = iscolb; 5459 } 5460 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5461 PetscFunctionReturn(0); 5462 } 5463 5464 /* 5465 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5466 of the OFF-DIAGONAL portion of local A 5467 5468 Collective on Mat 5469 5470 Input Parameters: 5471 + A,B - the matrices in mpiaij format 5472 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5473 5474 Output Parameter: 5475 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5476 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5477 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5478 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5479 5480 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5481 for this matrix. This is not desirable.. 5482 5483 Level: developer 5484 5485 */ 5486 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5487 { 5488 PetscErrorCode ierr; 5489 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5490 Mat_SeqAIJ *b_oth; 5491 VecScatter ctx; 5492 MPI_Comm comm; 5493 const PetscMPIInt *rprocs,*sprocs; 5494 const PetscInt *srow,*rstarts,*sstarts; 5495 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5496 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5497 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5498 MPI_Request *rwaits = NULL,*swaits = NULL; 5499 MPI_Status rstatus; 5500 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5501 5502 PetscFunctionBegin; 5503 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5504 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5505 5506 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5507 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5508 } 5509 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5510 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5511 5512 if (size == 1) { 5513 startsj_s = NULL; 5514 bufa_ptr = NULL; 5515 *B_oth = NULL; 5516 PetscFunctionReturn(0); 5517 } 5518 5519 ctx = a->Mvctx; 5520 tag = ((PetscObject)ctx)->tag; 5521 5522 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5523 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5524 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5525 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5526 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5527 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5528 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5529 5530 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5531 if (scall == MAT_INITIAL_MATRIX) { 5532 /* i-array */ 5533 /*---------*/ 5534 /* post receives */ 5535 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5536 for (i=0; i<nrecvs; i++) { 5537 rowlen = rvalues + rstarts[i]*rbs; 5538 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5539 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5540 } 5541 5542 /* pack the outgoing message */ 5543 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5544 5545 sstartsj[0] = 0; 5546 rstartsj[0] = 0; 5547 len = 0; /* total length of j or a array to be sent */ 5548 if (nsends) { 5549 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5550 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5551 } 5552 for (i=0; i<nsends; i++) { 5553 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5554 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5555 for (j=0; j<nrows; j++) { 5556 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5557 for (l=0; l<sbs; l++) { 5558 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5559 5560 rowlen[j*sbs+l] = ncols; 5561 5562 len += ncols; 5563 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5564 } 5565 k++; 5566 } 5567 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5568 5569 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5570 } 5571 /* recvs and sends of i-array are completed */ 5572 i = nrecvs; 5573 while (i--) { 5574 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5575 } 5576 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5577 ierr = PetscFree(svalues);CHKERRQ(ierr); 5578 5579 /* allocate buffers for sending j and a arrays */ 5580 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5581 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5582 5583 /* create i-array of B_oth */ 5584 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5585 5586 b_othi[0] = 0; 5587 len = 0; /* total length of j or a array to be received */ 5588 k = 0; 5589 for (i=0; i<nrecvs; i++) { 5590 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5591 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5592 for (j=0; j<nrows; j++) { 5593 b_othi[k+1] = b_othi[k] + rowlen[j]; 5594 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5595 k++; 5596 } 5597 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5598 } 5599 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5600 5601 /* allocate space for j and a arrrays of B_oth */ 5602 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5603 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5604 5605 /* j-array */ 5606 /*---------*/ 5607 /* post receives of j-array */ 5608 for (i=0; i<nrecvs; i++) { 5609 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5610 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5611 } 5612 5613 /* pack the outgoing message j-array */ 5614 if (nsends) k = sstarts[0]; 5615 for (i=0; i<nsends; i++) { 5616 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5617 bufJ = bufj+sstartsj[i]; 5618 for (j=0; j<nrows; j++) { 5619 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5620 for (ll=0; ll<sbs; ll++) { 5621 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5622 for (l=0; l<ncols; l++) { 5623 *bufJ++ = cols[l]; 5624 } 5625 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5626 } 5627 } 5628 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5629 } 5630 5631 /* recvs and sends of j-array are completed */ 5632 i = nrecvs; 5633 while (i--) { 5634 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5635 } 5636 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5637 } else if (scall == MAT_REUSE_MATRIX) { 5638 sstartsj = *startsj_s; 5639 rstartsj = *startsj_r; 5640 bufa = *bufa_ptr; 5641 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5642 b_otha = b_oth->a; 5643 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5644 5645 /* a-array */ 5646 /*---------*/ 5647 /* post receives of a-array */ 5648 for (i=0; i<nrecvs; i++) { 5649 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5650 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5651 } 5652 5653 /* pack the outgoing message a-array */ 5654 if (nsends) k = sstarts[0]; 5655 for (i=0; i<nsends; i++) { 5656 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5657 bufA = bufa+sstartsj[i]; 5658 for (j=0; j<nrows; j++) { 5659 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5660 for (ll=0; ll<sbs; ll++) { 5661 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5662 for (l=0; l<ncols; l++) { 5663 *bufA++ = vals[l]; 5664 } 5665 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5666 } 5667 } 5668 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5669 } 5670 /* recvs and sends of a-array are completed */ 5671 i = nrecvs; 5672 while (i--) { 5673 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5674 } 5675 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5676 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5677 5678 if (scall == MAT_INITIAL_MATRIX) { 5679 /* put together the new matrix */ 5680 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5681 5682 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5683 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5684 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5685 b_oth->free_a = PETSC_TRUE; 5686 b_oth->free_ij = PETSC_TRUE; 5687 b_oth->nonew = 0; 5688 5689 ierr = PetscFree(bufj);CHKERRQ(ierr); 5690 if (!startsj_s || !bufa_ptr) { 5691 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5692 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5693 } else { 5694 *startsj_s = sstartsj; 5695 *startsj_r = rstartsj; 5696 *bufa_ptr = bufa; 5697 } 5698 } 5699 5700 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5701 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5702 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5703 PetscFunctionReturn(0); 5704 } 5705 5706 /*@C 5707 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5708 5709 Not Collective 5710 5711 Input Parameters: 5712 . A - The matrix in mpiaij format 5713 5714 Output Parameter: 5715 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5716 . colmap - A map from global column index to local index into lvec 5717 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5718 5719 Level: developer 5720 5721 @*/ 5722 #if defined(PETSC_USE_CTABLE) 5723 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5724 #else 5725 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5726 #endif 5727 { 5728 Mat_MPIAIJ *a; 5729 5730 PetscFunctionBegin; 5731 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5732 PetscValidPointer(lvec, 2); 5733 PetscValidPointer(colmap, 3); 5734 PetscValidPointer(multScatter, 4); 5735 a = (Mat_MPIAIJ*) A->data; 5736 if (lvec) *lvec = a->lvec; 5737 if (colmap) *colmap = a->colmap; 5738 if (multScatter) *multScatter = a->Mvctx; 5739 PetscFunctionReturn(0); 5740 } 5741 5742 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5743 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5744 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5745 #if defined(PETSC_HAVE_MKL_SPARSE) 5746 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5747 #endif 5748 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5749 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5750 #if defined(PETSC_HAVE_ELEMENTAL) 5751 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5752 #endif 5753 #if defined(PETSC_HAVE_HYPRE) 5754 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5755 #endif 5756 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5757 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5758 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5759 5760 /* 5761 Computes (B'*A')' since computing B*A directly is untenable 5762 5763 n p p 5764 ( ) ( ) ( ) 5765 m ( A ) * n ( B ) = m ( C ) 5766 ( ) ( ) ( ) 5767 5768 */ 5769 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5770 { 5771 PetscErrorCode ierr; 5772 Mat At,Bt,Ct; 5773 5774 PetscFunctionBegin; 5775 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5776 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5777 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5778 ierr = MatDestroy(&At);CHKERRQ(ierr); 5779 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5780 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5781 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5782 PetscFunctionReturn(0); 5783 } 5784 5785 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5786 { 5787 PetscErrorCode ierr; 5788 PetscInt m=A->rmap->n,n=B->cmap->n; 5789 5790 PetscFunctionBegin; 5791 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5792 ierr = MatSetSizes(C,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5793 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5794 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 5795 ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr); 5796 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5797 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5798 5799 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5800 PetscFunctionReturn(0); 5801 } 5802 5803 /* ----------------------------------------------------------------*/ 5804 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5805 { 5806 Mat_Product *product = C->product; 5807 Mat A = product->A,B=product->B; 5808 5809 PetscFunctionBegin; 5810 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5811 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5812 5813 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5814 C->ops->productsymbolic = MatProductSymbolic_AB; 5815 PetscFunctionReturn(0); 5816 } 5817 5818 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5819 { 5820 PetscErrorCode ierr; 5821 Mat_Product *product = C->product; 5822 5823 PetscFunctionBegin; 5824 if (product->type == MATPRODUCT_AB) { 5825 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5826 } else SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type is not supported"); 5827 PetscFunctionReturn(0); 5828 } 5829 /* ----------------------------------------------------------------*/ 5830 5831 /*MC 5832 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5833 5834 Options Database Keys: 5835 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5836 5837 Level: beginner 5838 5839 Notes: 5840 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5841 in this case the values associated with the rows and columns one passes in are set to zero 5842 in the matrix 5843 5844 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5845 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5846 5847 .seealso: MatCreateAIJ() 5848 M*/ 5849 5850 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5851 { 5852 Mat_MPIAIJ *b; 5853 PetscErrorCode ierr; 5854 PetscMPIInt size; 5855 5856 PetscFunctionBegin; 5857 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5858 5859 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5860 B->data = (void*)b; 5861 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5862 B->assembled = PETSC_FALSE; 5863 B->insertmode = NOT_SET_VALUES; 5864 b->size = size; 5865 5866 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5867 5868 /* build cache for off array entries formed */ 5869 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5870 5871 b->donotstash = PETSC_FALSE; 5872 b->colmap = 0; 5873 b->garray = 0; 5874 b->roworiented = PETSC_TRUE; 5875 5876 /* stuff used for matrix vector multiply */ 5877 b->lvec = NULL; 5878 b->Mvctx = NULL; 5879 5880 /* stuff for MatGetRow() */ 5881 b->rowindices = 0; 5882 b->rowvalues = 0; 5883 b->getrowactive = PETSC_FALSE; 5884 5885 /* flexible pointer used in CUSP/CUSPARSE classes */ 5886 b->spptr = NULL; 5887 5888 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5889 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5890 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5891 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5892 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5893 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5894 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5895 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5896 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5897 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5898 #if defined(PETSC_HAVE_MKL_SPARSE) 5899 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5900 #endif 5901 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5902 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 5903 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5904 #if defined(PETSC_HAVE_ELEMENTAL) 5905 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5906 #endif 5907 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5908 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5909 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5910 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5911 #if defined(PETSC_HAVE_HYPRE) 5912 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5913 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5914 #endif 5915 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 5916 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 5917 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5918 PetscFunctionReturn(0); 5919 } 5920 5921 /*@C 5922 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5923 and "off-diagonal" part of the matrix in CSR format. 5924 5925 Collective 5926 5927 Input Parameters: 5928 + comm - MPI communicator 5929 . m - number of local rows (Cannot be PETSC_DECIDE) 5930 . n - This value should be the same as the local size used in creating the 5931 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5932 calculated if N is given) For square matrices n is almost always m. 5933 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5934 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5935 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5936 . j - column indices 5937 . a - matrix values 5938 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5939 . oj - column indices 5940 - oa - matrix values 5941 5942 Output Parameter: 5943 . mat - the matrix 5944 5945 Level: advanced 5946 5947 Notes: 5948 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5949 must free the arrays once the matrix has been destroyed and not before. 5950 5951 The i and j indices are 0 based 5952 5953 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5954 5955 This sets local rows and cannot be used to set off-processor values. 5956 5957 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5958 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5959 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5960 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5961 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5962 communication if it is known that only local entries will be set. 5963 5964 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5965 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5966 @*/ 5967 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5968 { 5969 PetscErrorCode ierr; 5970 Mat_MPIAIJ *maij; 5971 5972 PetscFunctionBegin; 5973 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5974 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5975 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5976 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5977 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5978 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5979 maij = (Mat_MPIAIJ*) (*mat)->data; 5980 5981 (*mat)->preallocated = PETSC_TRUE; 5982 5983 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5984 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5985 5986 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5987 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5988 5989 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5990 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5991 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5992 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5993 5994 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5995 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5996 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5997 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5998 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5999 PetscFunctionReturn(0); 6000 } 6001 6002 /* 6003 Special version for direct calls from Fortran 6004 */ 6005 #include <petsc/private/fortranimpl.h> 6006 6007 /* Change these macros so can be used in void function */ 6008 #undef CHKERRQ 6009 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6010 #undef SETERRQ2 6011 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6012 #undef SETERRQ3 6013 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6014 #undef SETERRQ 6015 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6016 6017 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6018 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6019 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6020 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6021 #else 6022 #endif 6023 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6024 { 6025 Mat mat = *mmat; 6026 PetscInt m = *mm, n = *mn; 6027 InsertMode addv = *maddv; 6028 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6029 PetscScalar value; 6030 PetscErrorCode ierr; 6031 6032 MatCheckPreallocated(mat,1); 6033 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6034 6035 #if defined(PETSC_USE_DEBUG) 6036 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6037 #endif 6038 { 6039 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6040 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6041 PetscBool roworiented = aij->roworiented; 6042 6043 /* Some Variables required in the macro */ 6044 Mat A = aij->A; 6045 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6046 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6047 MatScalar *aa = a->a; 6048 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6049 Mat B = aij->B; 6050 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6051 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6052 MatScalar *ba = b->a; 6053 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6054 * cannot use "#if defined" inside a macro. */ 6055 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6056 6057 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6058 PetscInt nonew = a->nonew; 6059 MatScalar *ap1,*ap2; 6060 6061 PetscFunctionBegin; 6062 for (i=0; i<m; i++) { 6063 if (im[i] < 0) continue; 6064 #if defined(PETSC_USE_DEBUG) 6065 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6066 #endif 6067 if (im[i] >= rstart && im[i] < rend) { 6068 row = im[i] - rstart; 6069 lastcol1 = -1; 6070 rp1 = aj + ai[row]; 6071 ap1 = aa + ai[row]; 6072 rmax1 = aimax[row]; 6073 nrow1 = ailen[row]; 6074 low1 = 0; 6075 high1 = nrow1; 6076 lastcol2 = -1; 6077 rp2 = bj + bi[row]; 6078 ap2 = ba + bi[row]; 6079 rmax2 = bimax[row]; 6080 nrow2 = bilen[row]; 6081 low2 = 0; 6082 high2 = nrow2; 6083 6084 for (j=0; j<n; j++) { 6085 if (roworiented) value = v[i*n+j]; 6086 else value = v[i+j*m]; 6087 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6088 if (in[j] >= cstart && in[j] < cend) { 6089 col = in[j] - cstart; 6090 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6091 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6092 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6093 #endif 6094 } else if (in[j] < 0) continue; 6095 #if defined(PETSC_USE_DEBUG) 6096 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6097 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6098 #endif 6099 else { 6100 if (mat->was_assembled) { 6101 if (!aij->colmap) { 6102 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6103 } 6104 #if defined(PETSC_USE_CTABLE) 6105 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6106 col--; 6107 #else 6108 col = aij->colmap[in[j]] - 1; 6109 #endif 6110 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6111 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6112 col = in[j]; 6113 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6114 B = aij->B; 6115 b = (Mat_SeqAIJ*)B->data; 6116 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6117 rp2 = bj + bi[row]; 6118 ap2 = ba + bi[row]; 6119 rmax2 = bimax[row]; 6120 nrow2 = bilen[row]; 6121 low2 = 0; 6122 high2 = nrow2; 6123 bm = aij->B->rmap->n; 6124 ba = b->a; 6125 inserted = PETSC_FALSE; 6126 } 6127 } else col = in[j]; 6128 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6129 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6130 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6131 #endif 6132 } 6133 } 6134 } else if (!aij->donotstash) { 6135 if (roworiented) { 6136 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6137 } else { 6138 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6139 } 6140 } 6141 } 6142 } 6143 PetscFunctionReturnVoid(); 6144 } 6145