1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 796 797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 803 PetscFunctionBegin; 804 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 805 806 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 807 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 808 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 813 { 814 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 816 PetscErrorCode ierr; 817 PetscMPIInt n; 818 PetscInt i,j,rstart,ncols,flg; 819 PetscInt *row,*col; 820 PetscBool other_disassembled; 821 PetscScalar *val; 822 823 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 824 825 PetscFunctionBegin; 826 if (!aij->donotstash && !mat->nooffprocentries) { 827 while (1) { 828 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 829 if (!flg) break; 830 831 for (i=0; i<n; ) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j=i,rstart=row[j]; j<n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j-i; 837 else ncols = n-i; 838 /* Now assemble all these values with a single function call */ 839 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 840 i = j; 841 } 842 } 843 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 844 } 845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 846 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 847 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 848 if (mat->boundtocpu) { 849 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 850 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 851 } 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = 0; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscMPIInt p = 0; 993 PetscSFNode *rrows; 994 PetscSF sf; 995 const PetscScalar *xx; 996 PetscScalar *bb,*mask; 997 Vec xmask,lmask; 998 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 999 const PetscInt *aj, *ii,*ridx; 1000 PetscScalar *aa; 1001 1002 PetscFunctionBegin; 1003 /* Create SF where leaves are input rows and roots are owned rows */ 1004 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1005 for (r = 0; r < n; ++r) lrows[r] = -1; 1006 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1007 for (r = 0; r < N; ++r) { 1008 const PetscInt idx = rows[r]; 1009 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1010 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1011 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1012 } 1013 rrows[r].rank = p; 1014 rrows[r].index = rows[r] - owners[p]; 1015 } 1016 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1017 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1018 /* Collect flags for rows to be zeroed */ 1019 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1022 /* Compress and put in row numbers */ 1023 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1024 /* zero diagonal part of matrix */ 1025 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1026 /* handle off diagonal part of matrix */ 1027 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1028 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1029 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1030 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1036 PetscBool cong; 1037 1038 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1039 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1040 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1043 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1044 } 1045 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1046 /* remove zeroed rows of off diagonal matrix */ 1047 ii = aij->i; 1048 for (i=0; i<len; i++) { 1049 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1050 } 1051 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1052 if (aij->compressedrow.use) { 1053 m = aij->compressedrow.nrows; 1054 ii = aij->compressedrow.i; 1055 ridx = aij->compressedrow.rindex; 1056 for (i=0; i<m; i++) { 1057 n = ii[i+1] - ii[i]; 1058 aj = aij->j + ii[i]; 1059 aa = aij->a + ii[i]; 1060 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[*ridx] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 ridx++; 1070 } 1071 } else { /* do not use compressed row format */ 1072 m = l->B->rmap->n; 1073 for (i=0; i<m; i++) { 1074 n = ii[i+1] - ii[i]; 1075 aj = aij->j + ii[i]; 1076 aa = aij->a + ii[i]; 1077 for (j=0; j<n; j++) { 1078 if (PetscAbsScalar(mask[*aj])) { 1079 if (b) bb[i] -= *aa*xx[*aj]; 1080 *aa = 0.0; 1081 } 1082 aa++; 1083 aj++; 1084 } 1085 } 1086 } 1087 if (x && b) { 1088 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1089 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1090 } 1091 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1092 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1093 ierr = PetscFree(lrows);CHKERRQ(ierr); 1094 1095 /* only change matrix nonzero state if pattern was allowed to be changed */ 1096 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1097 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1098 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 VecScatter Mvctx = a->Mvctx; 1109 1110 PetscFunctionBegin; 1111 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1112 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1113 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 VecScatter Mvctx = a->Mvctx; 1135 1136 PetscFunctionBegin; 1137 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1138 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* do local part */ 1154 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1155 /* add partial results together */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1162 { 1163 MPI_Comm comm; 1164 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1165 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1166 IS Me,Notme; 1167 PetscErrorCode ierr; 1168 PetscInt M,N,first,last,*notme,i; 1169 PetscBool lf; 1170 PetscMPIInt size; 1171 1172 PetscFunctionBegin; 1173 /* Easy test: symmetric diagonal block */ 1174 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1175 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1176 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1177 if (!*f) PetscFunctionReturn(0); 1178 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1180 if (size == 1) PetscFunctionReturn(0); 1181 1182 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1183 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1184 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1186 for (i=0; i<first; i++) notme[i] = i; 1187 for (i=last; i<M; i++) notme[i-last+first] = i; 1188 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1189 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1190 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1191 Aoff = Aoffs[0]; 1192 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1193 Boff = Boffs[0]; 1194 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1195 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1197 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1199 ierr = PetscFree(notme);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1213 { 1214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 /* do nondiagonal part */ 1219 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1220 /* do local part */ 1221 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1222 /* add partial results together */ 1223 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1233 { 1234 PetscErrorCode ierr; 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 1237 PetscFunctionBegin; 1238 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1239 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1245 { 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1251 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1256 { 1257 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 #if defined(PETSC_USE_LOG) 1262 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1263 #endif 1264 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1266 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1268 #if defined(PETSC_USE_CTABLE) 1269 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1270 #else 1271 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1272 #endif 1273 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1274 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1275 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1276 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1277 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1278 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1279 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1280 1281 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1282 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1283 1284 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1294 #if defined(PETSC_HAVE_ELEMENTAL) 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 #if defined(PETSC_HAVE_HYPRE) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1300 #endif 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1308 { 1309 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1310 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1311 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1312 const PetscInt *garray = aij->garray; 1313 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1314 PetscInt *rowlens; 1315 PetscInt *colidxs; 1316 PetscScalar *matvals; 1317 PetscErrorCode ierr; 1318 1319 PetscFunctionBegin; 1320 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1321 1322 M = mat->rmap->N; 1323 N = mat->cmap->N; 1324 m = mat->rmap->n; 1325 rs = mat->rmap->rstart; 1326 cs = mat->cmap->rstart; 1327 nz = A->nz + B->nz; 1328 1329 /* write matrix header */ 1330 header[0] = MAT_FILE_CLASSID; 1331 header[1] = M; header[2] = N; header[3] = nz; 1332 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1333 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1334 1335 /* fill in and store row lengths */ 1336 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1337 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1338 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1339 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1340 1341 /* fill in and store column indices */ 1342 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1343 for (cnt=0, i=0; i<m; i++) { 1344 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1345 if (garray[B->j[jb]] > cs) break; 1346 colidxs[cnt++] = garray[B->j[jb]]; 1347 } 1348 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1349 colidxs[cnt++] = A->j[ja] + cs; 1350 for (; jb<B->i[i+1]; jb++) 1351 colidxs[cnt++] = garray[B->j[jb]]; 1352 } 1353 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1354 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1355 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1356 1357 /* fill in and store nonzero values */ 1358 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1359 for (cnt=0, i=0; i<m; i++) { 1360 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1361 if (garray[B->j[jb]] > cs) break; 1362 matvals[cnt++] = B->a[jb]; 1363 } 1364 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1365 matvals[cnt++] = A->a[ja]; 1366 for (; jb<B->i[i+1]; jb++) 1367 matvals[cnt++] = B->a[jb]; 1368 } 1369 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1370 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1371 ierr = PetscFree(matvals);CHKERRQ(ierr); 1372 1373 /* write block size option to the viewer's .info file */ 1374 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 #include <petscdraw.h> 1379 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1380 { 1381 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1382 PetscErrorCode ierr; 1383 PetscMPIInt rank = aij->rank,size = aij->size; 1384 PetscBool isdraw,iascii,isbinary; 1385 PetscViewer sviewer; 1386 PetscViewerFormat format; 1387 1388 PetscFunctionBegin; 1389 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1390 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1391 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1392 if (iascii) { 1393 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1394 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1395 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1396 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1397 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1398 for (i=0; i<(PetscInt)size; i++) { 1399 nmax = PetscMax(nmax,nz[i]); 1400 nmin = PetscMin(nmin,nz[i]); 1401 navg += nz[i]; 1402 } 1403 ierr = PetscFree(nz);CHKERRQ(ierr); 1404 navg = navg/size; 1405 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1406 PetscFunctionReturn(0); 1407 } 1408 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1409 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1410 MatInfo info; 1411 PetscBool inodes; 1412 1413 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1414 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1415 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1416 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1417 if (!inodes) { 1418 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1419 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1420 } else { 1421 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1422 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1423 } 1424 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1425 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1426 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1427 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1428 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1429 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1431 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1432 PetscFunctionReturn(0); 1433 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1434 PetscInt inodecount,inodelimit,*inodes; 1435 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1436 if (inodes) { 1437 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1438 } else { 1439 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1440 } 1441 PetscFunctionReturn(0); 1442 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1443 PetscFunctionReturn(0); 1444 } 1445 } else if (isbinary) { 1446 if (size == 1) { 1447 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1448 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1449 } else { 1450 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1451 } 1452 PetscFunctionReturn(0); 1453 } else if (iascii && size == 1) { 1454 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1455 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1456 PetscFunctionReturn(0); 1457 } else if (isdraw) { 1458 PetscDraw draw; 1459 PetscBool isnull; 1460 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1461 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1462 if (isnull) PetscFunctionReturn(0); 1463 } 1464 1465 { /* assemble the entire matrix onto first processor */ 1466 Mat A = NULL, Av; 1467 IS isrow,iscol; 1468 1469 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1470 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1471 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1472 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1473 /* The commented code uses MatCreateSubMatrices instead */ 1474 /* 1475 Mat *AA, A = NULL, Av; 1476 IS isrow,iscol; 1477 1478 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1479 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1480 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1481 if (!rank) { 1482 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1483 A = AA[0]; 1484 Av = AA[0]; 1485 } 1486 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1487 */ 1488 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1489 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1490 /* 1491 Everyone has to call to draw the matrix since the graphics waits are 1492 synchronized across all processors that share the PetscDraw object 1493 */ 1494 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1495 if (!rank) { 1496 if (((PetscObject)mat)->name) { 1497 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 } 1499 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1500 } 1501 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1502 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1503 ierr = MatDestroy(&A);CHKERRQ(ierr); 1504 } 1505 PetscFunctionReturn(0); 1506 } 1507 1508 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1509 { 1510 PetscErrorCode ierr; 1511 PetscBool iascii,isdraw,issocket,isbinary; 1512 1513 PetscFunctionBegin; 1514 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1515 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1516 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1518 if (iascii || isdraw || isbinary || issocket) { 1519 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1525 { 1526 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1527 PetscErrorCode ierr; 1528 Vec bb1 = 0; 1529 PetscBool hasop; 1530 1531 PetscFunctionBegin; 1532 if (flag == SOR_APPLY_UPPER) { 1533 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1534 PetscFunctionReturn(0); 1535 } 1536 1537 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1538 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1539 } 1540 1541 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1542 if (flag & SOR_ZERO_INITIAL_GUESS) { 1543 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1544 its--; 1545 } 1546 1547 while (its--) { 1548 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1549 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1550 1551 /* update rhs: bb1 = bb - B*x */ 1552 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1553 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1554 1555 /* local sweep */ 1556 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1557 } 1558 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1559 if (flag & SOR_ZERO_INITIAL_GUESS) { 1560 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1561 its--; 1562 } 1563 while (its--) { 1564 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1565 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 1567 /* update rhs: bb1 = bb - B*x */ 1568 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1569 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1570 1571 /* local sweep */ 1572 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1573 } 1574 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1575 if (flag & SOR_ZERO_INITIAL_GUESS) { 1576 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1577 its--; 1578 } 1579 while (its--) { 1580 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1581 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1582 1583 /* update rhs: bb1 = bb - B*x */ 1584 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1585 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1586 1587 /* local sweep */ 1588 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1589 } 1590 } else if (flag & SOR_EISENSTAT) { 1591 Vec xx1; 1592 1593 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1594 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1595 1596 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1597 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1598 if (!mat->diag) { 1599 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1600 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1601 } 1602 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1603 if (hasop) { 1604 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1605 } else { 1606 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1607 } 1608 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1609 1610 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1611 1612 /* local sweep */ 1613 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1614 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1615 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1616 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1617 1618 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1619 1620 matin->factorerrortype = mat->A->factorerrortype; 1621 PetscFunctionReturn(0); 1622 } 1623 1624 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1625 { 1626 Mat aA,aB,Aperm; 1627 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1628 PetscScalar *aa,*ba; 1629 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1630 PetscSF rowsf,sf; 1631 IS parcolp = NULL; 1632 PetscBool done; 1633 PetscErrorCode ierr; 1634 1635 PetscFunctionBegin; 1636 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1637 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1638 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1639 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1640 1641 /* Invert row permutation to find out where my rows should go */ 1642 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1643 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1644 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1645 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1646 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1647 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1648 1649 /* Invert column permutation to find out where my columns should go */ 1650 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1651 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1652 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1653 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1654 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1655 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1656 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1657 1658 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1659 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1660 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1661 1662 /* Find out where my gcols should go */ 1663 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1664 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1665 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1666 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1667 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1668 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1669 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1670 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1671 1672 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1673 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1674 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1675 for (i=0; i<m; i++) { 1676 PetscInt row = rdest[i]; 1677 PetscMPIInt rowner; 1678 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1679 for (j=ai[i]; j<ai[i+1]; j++) { 1680 PetscInt col = cdest[aj[j]]; 1681 PetscMPIInt cowner; 1682 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1683 if (rowner == cowner) dnnz[i]++; 1684 else onnz[i]++; 1685 } 1686 for (j=bi[i]; j<bi[i+1]; j++) { 1687 PetscInt col = gcdest[bj[j]]; 1688 PetscMPIInt cowner; 1689 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1690 if (rowner == cowner) dnnz[i]++; 1691 else onnz[i]++; 1692 } 1693 } 1694 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1695 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1696 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1697 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1698 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1699 1700 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1701 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1702 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1703 for (i=0; i<m; i++) { 1704 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1705 PetscInt j0,rowlen; 1706 rowlen = ai[i+1] - ai[i]; 1707 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1708 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1709 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1710 } 1711 rowlen = bi[i+1] - bi[i]; 1712 for (j0=j=0; j<rowlen; j0=j) { 1713 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1714 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1715 } 1716 } 1717 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1718 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1719 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1720 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1721 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1722 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1723 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1724 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1725 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1726 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1727 *B = Aperm; 1728 PetscFunctionReturn(0); 1729 } 1730 1731 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1732 { 1733 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1734 PetscErrorCode ierr; 1735 1736 PetscFunctionBegin; 1737 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1738 if (ghosts) *ghosts = aij->garray; 1739 PetscFunctionReturn(0); 1740 } 1741 1742 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1743 { 1744 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1745 Mat A = mat->A,B = mat->B; 1746 PetscErrorCode ierr; 1747 PetscLogDouble isend[5],irecv[5]; 1748 1749 PetscFunctionBegin; 1750 info->block_size = 1.0; 1751 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1752 1753 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1754 isend[3] = info->memory; isend[4] = info->mallocs; 1755 1756 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1757 1758 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1759 isend[3] += info->memory; isend[4] += info->mallocs; 1760 if (flag == MAT_LOCAL) { 1761 info->nz_used = isend[0]; 1762 info->nz_allocated = isend[1]; 1763 info->nz_unneeded = isend[2]; 1764 info->memory = isend[3]; 1765 info->mallocs = isend[4]; 1766 } else if (flag == MAT_GLOBAL_MAX) { 1767 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1768 1769 info->nz_used = irecv[0]; 1770 info->nz_allocated = irecv[1]; 1771 info->nz_unneeded = irecv[2]; 1772 info->memory = irecv[3]; 1773 info->mallocs = irecv[4]; 1774 } else if (flag == MAT_GLOBAL_SUM) { 1775 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1776 1777 info->nz_used = irecv[0]; 1778 info->nz_allocated = irecv[1]; 1779 info->nz_unneeded = irecv[2]; 1780 info->memory = irecv[3]; 1781 info->mallocs = irecv[4]; 1782 } 1783 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1784 info->fill_ratio_needed = 0; 1785 info->factor_mallocs = 0; 1786 PetscFunctionReturn(0); 1787 } 1788 1789 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1790 { 1791 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1792 PetscErrorCode ierr; 1793 1794 PetscFunctionBegin; 1795 switch (op) { 1796 case MAT_NEW_NONZERO_LOCATIONS: 1797 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1798 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1799 case MAT_KEEP_NONZERO_PATTERN: 1800 case MAT_NEW_NONZERO_LOCATION_ERR: 1801 case MAT_USE_INODES: 1802 case MAT_IGNORE_ZERO_ENTRIES: 1803 MatCheckPreallocated(A,1); 1804 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1805 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1806 break; 1807 case MAT_ROW_ORIENTED: 1808 MatCheckPreallocated(A,1); 1809 a->roworiented = flg; 1810 1811 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1812 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1813 break; 1814 case MAT_NEW_DIAGONALS: 1815 case MAT_SORTED_FULL: 1816 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1817 break; 1818 case MAT_IGNORE_OFF_PROC_ENTRIES: 1819 a->donotstash = flg; 1820 break; 1821 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1822 case MAT_SPD: 1823 case MAT_SYMMETRIC: 1824 case MAT_STRUCTURALLY_SYMMETRIC: 1825 case MAT_HERMITIAN: 1826 case MAT_SYMMETRY_ETERNAL: 1827 break; 1828 case MAT_SUBMAT_SINGLEIS: 1829 A->submat_singleis = flg; 1830 break; 1831 case MAT_STRUCTURE_ONLY: 1832 /* The option is handled directly by MatSetOption() */ 1833 break; 1834 default: 1835 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1836 } 1837 PetscFunctionReturn(0); 1838 } 1839 1840 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1841 { 1842 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1843 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1844 PetscErrorCode ierr; 1845 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1846 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1847 PetscInt *cmap,*idx_p; 1848 1849 PetscFunctionBegin; 1850 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1851 mat->getrowactive = PETSC_TRUE; 1852 1853 if (!mat->rowvalues && (idx || v)) { 1854 /* 1855 allocate enough space to hold information from the longest row. 1856 */ 1857 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1858 PetscInt max = 1,tmp; 1859 for (i=0; i<matin->rmap->n; i++) { 1860 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1861 if (max < tmp) max = tmp; 1862 } 1863 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1864 } 1865 1866 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1867 lrow = row - rstart; 1868 1869 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1870 if (!v) {pvA = 0; pvB = 0;} 1871 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1872 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1873 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1874 nztot = nzA + nzB; 1875 1876 cmap = mat->garray; 1877 if (v || idx) { 1878 if (nztot) { 1879 /* Sort by increasing column numbers, assuming A and B already sorted */ 1880 PetscInt imark = -1; 1881 if (v) { 1882 *v = v_p = mat->rowvalues; 1883 for (i=0; i<nzB; i++) { 1884 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1885 else break; 1886 } 1887 imark = i; 1888 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1889 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1890 } 1891 if (idx) { 1892 *idx = idx_p = mat->rowindices; 1893 if (imark > -1) { 1894 for (i=0; i<imark; i++) { 1895 idx_p[i] = cmap[cworkB[i]]; 1896 } 1897 } else { 1898 for (i=0; i<nzB; i++) { 1899 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1900 else break; 1901 } 1902 imark = i; 1903 } 1904 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1905 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1906 } 1907 } else { 1908 if (idx) *idx = 0; 1909 if (v) *v = 0; 1910 } 1911 } 1912 *nz = nztot; 1913 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1914 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1915 PetscFunctionReturn(0); 1916 } 1917 1918 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1919 { 1920 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1921 1922 PetscFunctionBegin; 1923 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1924 aij->getrowactive = PETSC_FALSE; 1925 PetscFunctionReturn(0); 1926 } 1927 1928 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1929 { 1930 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1931 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1932 PetscErrorCode ierr; 1933 PetscInt i,j,cstart = mat->cmap->rstart; 1934 PetscReal sum = 0.0; 1935 MatScalar *v; 1936 1937 PetscFunctionBegin; 1938 if (aij->size == 1) { 1939 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1940 } else { 1941 if (type == NORM_FROBENIUS) { 1942 v = amat->a; 1943 for (i=0; i<amat->nz; i++) { 1944 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1945 } 1946 v = bmat->a; 1947 for (i=0; i<bmat->nz; i++) { 1948 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1949 } 1950 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1951 *norm = PetscSqrtReal(*norm); 1952 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1953 } else if (type == NORM_1) { /* max column norm */ 1954 PetscReal *tmp,*tmp2; 1955 PetscInt *jj,*garray = aij->garray; 1956 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1957 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1958 *norm = 0.0; 1959 v = amat->a; jj = amat->j; 1960 for (j=0; j<amat->nz; j++) { 1961 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1962 } 1963 v = bmat->a; jj = bmat->j; 1964 for (j=0; j<bmat->nz; j++) { 1965 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1966 } 1967 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1968 for (j=0; j<mat->cmap->N; j++) { 1969 if (tmp2[j] > *norm) *norm = tmp2[j]; 1970 } 1971 ierr = PetscFree(tmp);CHKERRQ(ierr); 1972 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1973 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1974 } else if (type == NORM_INFINITY) { /* max row norm */ 1975 PetscReal ntemp = 0.0; 1976 for (j=0; j<aij->A->rmap->n; j++) { 1977 v = amat->a + amat->i[j]; 1978 sum = 0.0; 1979 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1980 sum += PetscAbsScalar(*v); v++; 1981 } 1982 v = bmat->a + bmat->i[j]; 1983 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1984 sum += PetscAbsScalar(*v); v++; 1985 } 1986 if (sum > ntemp) ntemp = sum; 1987 } 1988 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1989 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1990 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1991 } 1992 PetscFunctionReturn(0); 1993 } 1994 1995 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1996 { 1997 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1998 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 1999 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2000 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2001 PetscErrorCode ierr; 2002 Mat B,A_diag,*B_diag; 2003 const MatScalar *array; 2004 2005 PetscFunctionBegin; 2006 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2007 ai = Aloc->i; aj = Aloc->j; 2008 bi = Bloc->i; bj = Bloc->j; 2009 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2010 PetscInt *d_nnz,*g_nnz,*o_nnz; 2011 PetscSFNode *oloc; 2012 PETSC_UNUSED PetscSF sf; 2013 2014 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2015 /* compute d_nnz for preallocation */ 2016 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2017 for (i=0; i<ai[ma]; i++) { 2018 d_nnz[aj[i]]++; 2019 } 2020 /* compute local off-diagonal contributions */ 2021 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2022 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2023 /* map those to global */ 2024 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2025 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2026 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2027 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2028 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2029 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2030 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2031 2032 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2033 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2034 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2035 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2036 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2037 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2038 } else { 2039 B = *matout; 2040 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2041 } 2042 2043 b = (Mat_MPIAIJ*)B->data; 2044 A_diag = a->A; 2045 B_diag = &b->A; 2046 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2047 A_diag_ncol = A_diag->cmap->N; 2048 B_diag_ilen = sub_B_diag->ilen; 2049 B_diag_i = sub_B_diag->i; 2050 2051 /* Set ilen for diagonal of B */ 2052 for (i=0; i<A_diag_ncol; i++) { 2053 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2054 } 2055 2056 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2057 very quickly (=without using MatSetValues), because all writes are local. */ 2058 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2059 2060 /* copy over the B part */ 2061 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2062 array = Bloc->a; 2063 row = A->rmap->rstart; 2064 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2065 cols_tmp = cols; 2066 for (i=0; i<mb; i++) { 2067 ncol = bi[i+1]-bi[i]; 2068 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2069 row++; 2070 array += ncol; cols_tmp += ncol; 2071 } 2072 ierr = PetscFree(cols);CHKERRQ(ierr); 2073 2074 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2075 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2076 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2077 *matout = B; 2078 } else { 2079 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2080 } 2081 PetscFunctionReturn(0); 2082 } 2083 2084 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2085 { 2086 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2087 Mat a = aij->A,b = aij->B; 2088 PetscErrorCode ierr; 2089 PetscInt s1,s2,s3; 2090 2091 PetscFunctionBegin; 2092 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2093 if (rr) { 2094 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2095 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2096 /* Overlap communication with computation. */ 2097 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2098 } 2099 if (ll) { 2100 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2101 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2102 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2103 } 2104 /* scale the diagonal block */ 2105 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2106 2107 if (rr) { 2108 /* Do a scatter end and then right scale the off-diagonal block */ 2109 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2110 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2111 } 2112 PetscFunctionReturn(0); 2113 } 2114 2115 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2116 { 2117 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2118 PetscErrorCode ierr; 2119 2120 PetscFunctionBegin; 2121 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2122 PetscFunctionReturn(0); 2123 } 2124 2125 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2126 { 2127 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2128 Mat a,b,c,d; 2129 PetscBool flg; 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 a = matA->A; b = matA->B; 2134 c = matB->A; d = matB->B; 2135 2136 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2137 if (flg) { 2138 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2139 } 2140 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2141 PetscFunctionReturn(0); 2142 } 2143 2144 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2145 { 2146 PetscErrorCode ierr; 2147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2148 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2149 2150 PetscFunctionBegin; 2151 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2152 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2153 /* because of the column compression in the off-processor part of the matrix a->B, 2154 the number of columns in a->B and b->B may be different, hence we cannot call 2155 the MatCopy() directly on the two parts. If need be, we can provide a more 2156 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2157 then copying the submatrices */ 2158 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2159 } else { 2160 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2161 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2162 } 2163 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2164 PetscFunctionReturn(0); 2165 } 2166 2167 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2168 { 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2173 PetscFunctionReturn(0); 2174 } 2175 2176 /* 2177 Computes the number of nonzeros per row needed for preallocation when X and Y 2178 have different nonzero structure. 2179 */ 2180 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2181 { 2182 PetscInt i,j,k,nzx,nzy; 2183 2184 PetscFunctionBegin; 2185 /* Set the number of nonzeros in the new matrix */ 2186 for (i=0; i<m; i++) { 2187 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2188 nzx = xi[i+1] - xi[i]; 2189 nzy = yi[i+1] - yi[i]; 2190 nnz[i] = 0; 2191 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2192 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2193 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2194 nnz[i]++; 2195 } 2196 for (; k<nzy; k++) nnz[i]++; 2197 } 2198 PetscFunctionReturn(0); 2199 } 2200 2201 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2202 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2203 { 2204 PetscErrorCode ierr; 2205 PetscInt m = Y->rmap->N; 2206 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2207 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2208 2209 PetscFunctionBegin; 2210 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2215 { 2216 PetscErrorCode ierr; 2217 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2218 PetscBLASInt bnz,one=1; 2219 Mat_SeqAIJ *x,*y; 2220 2221 PetscFunctionBegin; 2222 if (str == SAME_NONZERO_PATTERN) { 2223 PetscScalar alpha = a; 2224 x = (Mat_SeqAIJ*)xx->A->data; 2225 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2226 y = (Mat_SeqAIJ*)yy->A->data; 2227 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2228 x = (Mat_SeqAIJ*)xx->B->data; 2229 y = (Mat_SeqAIJ*)yy->B->data; 2230 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2231 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2232 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2233 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2234 will be updated */ 2235 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2236 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2237 Y->offloadmask = PETSC_OFFLOAD_CPU; 2238 } 2239 #endif 2240 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2241 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2242 } else { 2243 Mat B; 2244 PetscInt *nnz_d,*nnz_o; 2245 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2246 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2247 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2248 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2249 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2250 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2251 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2252 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2253 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2254 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2255 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2256 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2257 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2259 } 2260 PetscFunctionReturn(0); 2261 } 2262 2263 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2264 2265 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2266 { 2267 #if defined(PETSC_USE_COMPLEX) 2268 PetscErrorCode ierr; 2269 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2270 2271 PetscFunctionBegin; 2272 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2273 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2274 #else 2275 PetscFunctionBegin; 2276 #endif 2277 PetscFunctionReturn(0); 2278 } 2279 2280 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2281 { 2282 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2283 PetscErrorCode ierr; 2284 2285 PetscFunctionBegin; 2286 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2287 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2292 { 2293 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2294 PetscErrorCode ierr; 2295 2296 PetscFunctionBegin; 2297 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2298 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2303 { 2304 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2305 PetscErrorCode ierr; 2306 PetscInt i,*idxb = 0; 2307 PetscScalar *va,*vb; 2308 Vec vtmp; 2309 2310 PetscFunctionBegin; 2311 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2312 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2313 if (idx) { 2314 for (i=0; i<A->rmap->n; i++) { 2315 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2316 } 2317 } 2318 2319 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2320 if (idx) { 2321 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2322 } 2323 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2324 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2325 2326 for (i=0; i<A->rmap->n; i++) { 2327 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2328 va[i] = vb[i]; 2329 if (idx) idx[i] = a->garray[idxb[i]]; 2330 } 2331 } 2332 2333 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2334 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2335 ierr = PetscFree(idxb);CHKERRQ(ierr); 2336 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2341 { 2342 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2343 PetscErrorCode ierr; 2344 PetscInt i,*idxb = 0; 2345 PetscScalar *va,*vb; 2346 Vec vtmp; 2347 2348 PetscFunctionBegin; 2349 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2350 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2351 if (idx) { 2352 for (i=0; i<A->cmap->n; i++) { 2353 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2354 } 2355 } 2356 2357 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2358 if (idx) { 2359 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2360 } 2361 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2362 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2363 2364 for (i=0; i<A->rmap->n; i++) { 2365 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2366 va[i] = vb[i]; 2367 if (idx) idx[i] = a->garray[idxb[i]]; 2368 } 2369 } 2370 2371 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2372 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2373 ierr = PetscFree(idxb);CHKERRQ(ierr); 2374 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } 2377 2378 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2379 { 2380 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2381 PetscInt n = A->rmap->n; 2382 PetscInt cstart = A->cmap->rstart; 2383 PetscInt *cmap = mat->garray; 2384 PetscInt *diagIdx, *offdiagIdx; 2385 Vec diagV, offdiagV; 2386 PetscScalar *a, *diagA, *offdiagA; 2387 PetscInt r; 2388 PetscErrorCode ierr; 2389 2390 PetscFunctionBegin; 2391 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2392 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2393 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2394 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2395 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2396 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2397 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2398 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2399 for (r = 0; r < n; ++r) { 2400 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2401 a[r] = diagA[r]; 2402 idx[r] = cstart + diagIdx[r]; 2403 } else { 2404 a[r] = offdiagA[r]; 2405 idx[r] = cmap[offdiagIdx[r]]; 2406 } 2407 } 2408 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2409 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2410 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2411 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2412 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2413 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2414 PetscFunctionReturn(0); 2415 } 2416 2417 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2418 { 2419 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2420 PetscInt n = A->rmap->n; 2421 PetscInt cstart = A->cmap->rstart; 2422 PetscInt *cmap = mat->garray; 2423 PetscInt *diagIdx, *offdiagIdx; 2424 Vec diagV, offdiagV; 2425 PetscScalar *a, *diagA, *offdiagA; 2426 PetscInt r; 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2431 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2432 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2433 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2434 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2435 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2436 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2437 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2438 for (r = 0; r < n; ++r) { 2439 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2440 a[r] = diagA[r]; 2441 idx[r] = cstart + diagIdx[r]; 2442 } else { 2443 a[r] = offdiagA[r]; 2444 idx[r] = cmap[offdiagIdx[r]]; 2445 } 2446 } 2447 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2448 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2449 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2450 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2451 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2452 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2453 PetscFunctionReturn(0); 2454 } 2455 2456 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2457 { 2458 PetscErrorCode ierr; 2459 Mat *dummy; 2460 2461 PetscFunctionBegin; 2462 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2463 *newmat = *dummy; 2464 ierr = PetscFree(dummy);CHKERRQ(ierr); 2465 PetscFunctionReturn(0); 2466 } 2467 2468 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2469 { 2470 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2471 PetscErrorCode ierr; 2472 2473 PetscFunctionBegin; 2474 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2475 A->factorerrortype = a->A->factorerrortype; 2476 PetscFunctionReturn(0); 2477 } 2478 2479 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2480 { 2481 PetscErrorCode ierr; 2482 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2483 2484 PetscFunctionBegin; 2485 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2486 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2487 if (x->assembled) { 2488 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2489 } else { 2490 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2491 } 2492 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2493 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2494 PetscFunctionReturn(0); 2495 } 2496 2497 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2498 { 2499 PetscFunctionBegin; 2500 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2501 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2502 PetscFunctionReturn(0); 2503 } 2504 2505 /*@ 2506 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2507 2508 Collective on Mat 2509 2510 Input Parameters: 2511 + A - the matrix 2512 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2513 2514 Level: advanced 2515 2516 @*/ 2517 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2518 { 2519 PetscErrorCode ierr; 2520 2521 PetscFunctionBegin; 2522 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2527 { 2528 PetscErrorCode ierr; 2529 PetscBool sc = PETSC_FALSE,flg; 2530 2531 PetscFunctionBegin; 2532 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2533 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2534 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2535 if (flg) { 2536 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2537 } 2538 ierr = PetscOptionsTail();CHKERRQ(ierr); 2539 PetscFunctionReturn(0); 2540 } 2541 2542 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2543 { 2544 PetscErrorCode ierr; 2545 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2546 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2547 2548 PetscFunctionBegin; 2549 if (!Y->preallocated) { 2550 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2551 } else if (!aij->nz) { 2552 PetscInt nonew = aij->nonew; 2553 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2554 aij->nonew = nonew; 2555 } 2556 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2557 PetscFunctionReturn(0); 2558 } 2559 2560 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2561 { 2562 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2563 PetscErrorCode ierr; 2564 2565 PetscFunctionBegin; 2566 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2567 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2568 if (d) { 2569 PetscInt rstart; 2570 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2571 *d += rstart; 2572 2573 } 2574 PetscFunctionReturn(0); 2575 } 2576 2577 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2580 PetscErrorCode ierr; 2581 2582 PetscFunctionBegin; 2583 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2584 PetscFunctionReturn(0); 2585 } 2586 2587 /* -------------------------------------------------------------------*/ 2588 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2589 MatGetRow_MPIAIJ, 2590 MatRestoreRow_MPIAIJ, 2591 MatMult_MPIAIJ, 2592 /* 4*/ MatMultAdd_MPIAIJ, 2593 MatMultTranspose_MPIAIJ, 2594 MatMultTransposeAdd_MPIAIJ, 2595 0, 2596 0, 2597 0, 2598 /*10*/ 0, 2599 0, 2600 0, 2601 MatSOR_MPIAIJ, 2602 MatTranspose_MPIAIJ, 2603 /*15*/ MatGetInfo_MPIAIJ, 2604 MatEqual_MPIAIJ, 2605 MatGetDiagonal_MPIAIJ, 2606 MatDiagonalScale_MPIAIJ, 2607 MatNorm_MPIAIJ, 2608 /*20*/ MatAssemblyBegin_MPIAIJ, 2609 MatAssemblyEnd_MPIAIJ, 2610 MatSetOption_MPIAIJ, 2611 MatZeroEntries_MPIAIJ, 2612 /*24*/ MatZeroRows_MPIAIJ, 2613 0, 2614 0, 2615 0, 2616 0, 2617 /*29*/ MatSetUp_MPIAIJ, 2618 0, 2619 0, 2620 MatGetDiagonalBlock_MPIAIJ, 2621 0, 2622 /*34*/ MatDuplicate_MPIAIJ, 2623 0, 2624 0, 2625 0, 2626 0, 2627 /*39*/ MatAXPY_MPIAIJ, 2628 MatCreateSubMatrices_MPIAIJ, 2629 MatIncreaseOverlap_MPIAIJ, 2630 MatGetValues_MPIAIJ, 2631 MatCopy_MPIAIJ, 2632 /*44*/ MatGetRowMax_MPIAIJ, 2633 MatScale_MPIAIJ, 2634 MatShift_MPIAIJ, 2635 MatDiagonalSet_MPIAIJ, 2636 MatZeroRowsColumns_MPIAIJ, 2637 /*49*/ MatSetRandom_MPIAIJ, 2638 0, 2639 0, 2640 0, 2641 0, 2642 /*54*/ MatFDColoringCreate_MPIXAIJ, 2643 0, 2644 MatSetUnfactored_MPIAIJ, 2645 MatPermute_MPIAIJ, 2646 0, 2647 /*59*/ MatCreateSubMatrix_MPIAIJ, 2648 MatDestroy_MPIAIJ, 2649 MatView_MPIAIJ, 2650 0, 2651 0, 2652 /*64*/ 0, 2653 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2654 0, 2655 0, 2656 0, 2657 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2658 MatGetRowMinAbs_MPIAIJ, 2659 0, 2660 0, 2661 0, 2662 0, 2663 /*75*/ MatFDColoringApply_AIJ, 2664 MatSetFromOptions_MPIAIJ, 2665 0, 2666 0, 2667 MatFindZeroDiagonals_MPIAIJ, 2668 /*80*/ 0, 2669 0, 2670 0, 2671 /*83*/ MatLoad_MPIAIJ, 2672 MatIsSymmetric_MPIAIJ, 2673 0, 2674 0, 2675 0, 2676 0, 2677 /*89*/ 0, 2678 0, 2679 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2680 0, 2681 0, 2682 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2683 0, 2684 0, 2685 0, 2686 MatBindToCPU_MPIAIJ, 2687 /*99*/ MatProductSetFromOptions_MPIAIJ, 2688 0, 2689 0, 2690 MatConjugate_MPIAIJ, 2691 0, 2692 /*104*/MatSetValuesRow_MPIAIJ, 2693 MatRealPart_MPIAIJ, 2694 MatImaginaryPart_MPIAIJ, 2695 0, 2696 0, 2697 /*109*/0, 2698 0, 2699 MatGetRowMin_MPIAIJ, 2700 0, 2701 MatMissingDiagonal_MPIAIJ, 2702 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2703 0, 2704 MatGetGhosts_MPIAIJ, 2705 0, 2706 0, 2707 /*119*/0, 2708 0, 2709 0, 2710 0, 2711 MatGetMultiProcBlock_MPIAIJ, 2712 /*124*/MatFindNonzeroRows_MPIAIJ, 2713 MatGetColumnNorms_MPIAIJ, 2714 MatInvertBlockDiagonal_MPIAIJ, 2715 MatInvertVariableBlockDiagonal_MPIAIJ, 2716 MatCreateSubMatricesMPI_MPIAIJ, 2717 /*129*/0, 2718 0, 2719 0, 2720 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2721 0, 2722 /*134*/0, 2723 0, 2724 0, 2725 0, 2726 0, 2727 /*139*/MatSetBlockSizes_MPIAIJ, 2728 0, 2729 0, 2730 MatFDColoringSetUp_MPIXAIJ, 2731 MatFindOffBlockDiagonalEntries_MPIAIJ, 2732 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2733 /*145*/0, 2734 0, 2735 0 2736 }; 2737 2738 /* ----------------------------------------------------------------------------------------*/ 2739 2740 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2741 { 2742 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2743 PetscErrorCode ierr; 2744 2745 PetscFunctionBegin; 2746 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2747 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2748 PetscFunctionReturn(0); 2749 } 2750 2751 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2752 { 2753 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2754 PetscErrorCode ierr; 2755 2756 PetscFunctionBegin; 2757 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2758 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2759 PetscFunctionReturn(0); 2760 } 2761 2762 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2763 { 2764 Mat_MPIAIJ *b; 2765 PetscErrorCode ierr; 2766 PetscMPIInt size; 2767 2768 PetscFunctionBegin; 2769 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2770 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2771 b = (Mat_MPIAIJ*)B->data; 2772 2773 #if defined(PETSC_USE_CTABLE) 2774 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2775 #else 2776 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2777 #endif 2778 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2779 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2780 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2781 2782 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2783 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2784 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2785 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2786 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2787 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2788 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2789 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2790 2791 if (!B->preallocated) { 2792 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2793 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2794 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2795 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2796 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2797 } 2798 2799 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2800 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2801 B->preallocated = PETSC_TRUE; 2802 B->was_assembled = PETSC_FALSE; 2803 B->assembled = PETSC_FALSE; 2804 PetscFunctionReturn(0); 2805 } 2806 2807 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2808 { 2809 Mat_MPIAIJ *b; 2810 PetscErrorCode ierr; 2811 2812 PetscFunctionBegin; 2813 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2814 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2815 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2816 b = (Mat_MPIAIJ*)B->data; 2817 2818 #if defined(PETSC_USE_CTABLE) 2819 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2820 #else 2821 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2822 #endif 2823 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2824 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2825 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2826 2827 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2828 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2829 B->preallocated = PETSC_TRUE; 2830 B->was_assembled = PETSC_FALSE; 2831 B->assembled = PETSC_FALSE; 2832 PetscFunctionReturn(0); 2833 } 2834 2835 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2836 { 2837 Mat mat; 2838 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2839 PetscErrorCode ierr; 2840 2841 PetscFunctionBegin; 2842 *newmat = 0; 2843 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2844 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2845 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2846 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2847 a = (Mat_MPIAIJ*)mat->data; 2848 2849 mat->factortype = matin->factortype; 2850 mat->assembled = matin->assembled; 2851 mat->insertmode = NOT_SET_VALUES; 2852 mat->preallocated = matin->preallocated; 2853 2854 a->size = oldmat->size; 2855 a->rank = oldmat->rank; 2856 a->donotstash = oldmat->donotstash; 2857 a->roworiented = oldmat->roworiented; 2858 a->rowindices = NULL; 2859 a->rowvalues = NULL; 2860 a->getrowactive = PETSC_FALSE; 2861 2862 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2863 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2864 2865 if (oldmat->colmap) { 2866 #if defined(PETSC_USE_CTABLE) 2867 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2868 #else 2869 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2870 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2871 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2872 #endif 2873 } else a->colmap = NULL; 2874 if (oldmat->garray) { 2875 PetscInt len; 2876 len = oldmat->B->cmap->n; 2877 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2878 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2879 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2880 } else a->garray = NULL; 2881 2882 /* It may happen MatDuplicate is called with a non-assembled matrix 2883 In fact, MatDuplicate only requires the matrix to be preallocated 2884 This may happen inside a DMCreateMatrix_Shell */ 2885 if (oldmat->lvec) { 2886 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2887 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2888 } 2889 if (oldmat->Mvctx) { 2890 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2891 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2892 } 2893 if (oldmat->Mvctx_mpi1) { 2894 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2895 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2896 } 2897 2898 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2899 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2900 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2901 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2902 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2903 *newmat = mat; 2904 PetscFunctionReturn(0); 2905 } 2906 2907 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2908 { 2909 PetscBool isbinary, ishdf5; 2910 PetscErrorCode ierr; 2911 2912 PetscFunctionBegin; 2913 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2914 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2915 /* force binary viewer to load .info file if it has not yet done so */ 2916 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2917 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2918 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2919 if (isbinary) { 2920 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2921 } else if (ishdf5) { 2922 #if defined(PETSC_HAVE_HDF5) 2923 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2924 #else 2925 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2926 #endif 2927 } else { 2928 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2929 } 2930 PetscFunctionReturn(0); 2931 } 2932 2933 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2934 { 2935 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2936 PetscInt *rowidxs,*colidxs; 2937 PetscScalar *matvals; 2938 PetscErrorCode ierr; 2939 2940 PetscFunctionBegin; 2941 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2942 2943 /* read in matrix header */ 2944 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2945 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2946 M = header[1]; N = header[2]; nz = header[3]; 2947 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 2948 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 2949 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2950 2951 /* set block sizes from the viewer's .info file */ 2952 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 2953 /* set global sizes if not set already */ 2954 if (mat->rmap->N < 0) mat->rmap->N = M; 2955 if (mat->cmap->N < 0) mat->cmap->N = N; 2956 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 2957 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 2958 2959 /* check if the matrix sizes are correct */ 2960 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2961 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 2962 2963 /* read in row lengths and build row indices */ 2964 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 2965 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 2966 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 2967 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 2968 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 2969 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 2970 /* read in column indices and matrix values */ 2971 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 2972 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 2973 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 2974 /* store matrix indices and values */ 2975 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 2976 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 2977 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 2978 PetscFunctionReturn(0); 2979 } 2980 2981 /* Not scalable because of ISAllGather() unless getting all columns. */ 2982 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2983 { 2984 PetscErrorCode ierr; 2985 IS iscol_local; 2986 PetscBool isstride; 2987 PetscMPIInt lisstride=0,gisstride; 2988 2989 PetscFunctionBegin; 2990 /* check if we are grabbing all columns*/ 2991 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2992 2993 if (isstride) { 2994 PetscInt start,len,mstart,mlen; 2995 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2996 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2997 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 2998 if (mstart == start && mlen-mstart == len) lisstride = 1; 2999 } 3000 3001 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3002 if (gisstride) { 3003 PetscInt N; 3004 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3005 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3006 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3007 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3008 } else { 3009 PetscInt cbs; 3010 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3011 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3012 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3013 } 3014 3015 *isseq = iscol_local; 3016 PetscFunctionReturn(0); 3017 } 3018 3019 /* 3020 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3021 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3022 3023 Input Parameters: 3024 mat - matrix 3025 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3026 i.e., mat->rstart <= isrow[i] < mat->rend 3027 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3028 i.e., mat->cstart <= iscol[i] < mat->cend 3029 Output Parameter: 3030 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3031 iscol_o - sequential column index set for retrieving mat->B 3032 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3033 */ 3034 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3035 { 3036 PetscErrorCode ierr; 3037 Vec x,cmap; 3038 const PetscInt *is_idx; 3039 PetscScalar *xarray,*cmaparray; 3040 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3041 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3042 Mat B=a->B; 3043 Vec lvec=a->lvec,lcmap; 3044 PetscInt i,cstart,cend,Bn=B->cmap->N; 3045 MPI_Comm comm; 3046 VecScatter Mvctx=a->Mvctx; 3047 3048 PetscFunctionBegin; 3049 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3050 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3051 3052 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3053 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3054 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3055 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3056 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3057 3058 /* Get start indices */ 3059 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3060 isstart -= ncols; 3061 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3062 3063 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3064 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3065 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3066 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3067 for (i=0; i<ncols; i++) { 3068 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3069 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3070 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3071 } 3072 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3073 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3074 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3075 3076 /* Get iscol_d */ 3077 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3078 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3079 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3080 3081 /* Get isrow_d */ 3082 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3083 rstart = mat->rmap->rstart; 3084 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3085 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3086 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3087 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3088 3089 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3090 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3091 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3092 3093 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3094 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3095 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3096 3097 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3098 3099 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3100 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3101 3102 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3103 /* off-process column indices */ 3104 count = 0; 3105 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3106 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3107 3108 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3109 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3110 for (i=0; i<Bn; i++) { 3111 if (PetscRealPart(xarray[i]) > -1.0) { 3112 idx[count] = i; /* local column index in off-diagonal part B */ 3113 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3114 count++; 3115 } 3116 } 3117 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3118 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3119 3120 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3121 /* cannot ensure iscol_o has same blocksize as iscol! */ 3122 3123 ierr = PetscFree(idx);CHKERRQ(ierr); 3124 *garray = cmap1; 3125 3126 ierr = VecDestroy(&x);CHKERRQ(ierr); 3127 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3128 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3129 PetscFunctionReturn(0); 3130 } 3131 3132 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3133 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3134 { 3135 PetscErrorCode ierr; 3136 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3137 Mat M = NULL; 3138 MPI_Comm comm; 3139 IS iscol_d,isrow_d,iscol_o; 3140 Mat Asub = NULL,Bsub = NULL; 3141 PetscInt n; 3142 3143 PetscFunctionBegin; 3144 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3145 3146 if (call == MAT_REUSE_MATRIX) { 3147 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3148 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3149 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3150 3151 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3152 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3153 3154 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3155 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3156 3157 /* Update diagonal and off-diagonal portions of submat */ 3158 asub = (Mat_MPIAIJ*)(*submat)->data; 3159 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3160 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3161 if (n) { 3162 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3163 } 3164 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3165 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3166 3167 } else { /* call == MAT_INITIAL_MATRIX) */ 3168 const PetscInt *garray; 3169 PetscInt BsubN; 3170 3171 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3172 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3173 3174 /* Create local submatrices Asub and Bsub */ 3175 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3176 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3177 3178 /* Create submatrix M */ 3179 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3180 3181 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3182 asub = (Mat_MPIAIJ*)M->data; 3183 3184 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3185 n = asub->B->cmap->N; 3186 if (BsubN > n) { 3187 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3188 const PetscInt *idx; 3189 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3190 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3191 3192 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3193 j = 0; 3194 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3195 for (i=0; i<n; i++) { 3196 if (j >= BsubN) break; 3197 while (subgarray[i] > garray[j]) j++; 3198 3199 if (subgarray[i] == garray[j]) { 3200 idx_new[i] = idx[j++]; 3201 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3202 } 3203 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3204 3205 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3206 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3207 3208 } else if (BsubN < n) { 3209 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3210 } 3211 3212 ierr = PetscFree(garray);CHKERRQ(ierr); 3213 *submat = M; 3214 3215 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3216 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3217 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3218 3219 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3220 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3221 3222 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3223 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3224 } 3225 PetscFunctionReturn(0); 3226 } 3227 3228 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3229 { 3230 PetscErrorCode ierr; 3231 IS iscol_local=NULL,isrow_d; 3232 PetscInt csize; 3233 PetscInt n,i,j,start,end; 3234 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3235 MPI_Comm comm; 3236 3237 PetscFunctionBegin; 3238 /* If isrow has same processor distribution as mat, 3239 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3240 if (call == MAT_REUSE_MATRIX) { 3241 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3242 if (isrow_d) { 3243 sameRowDist = PETSC_TRUE; 3244 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3245 } else { 3246 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3247 if (iscol_local) { 3248 sameRowDist = PETSC_TRUE; 3249 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3250 } 3251 } 3252 } else { 3253 /* Check if isrow has same processor distribution as mat */ 3254 sameDist[0] = PETSC_FALSE; 3255 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3256 if (!n) { 3257 sameDist[0] = PETSC_TRUE; 3258 } else { 3259 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3260 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3261 if (i >= start && j < end) { 3262 sameDist[0] = PETSC_TRUE; 3263 } 3264 } 3265 3266 /* Check if iscol has same processor distribution as mat */ 3267 sameDist[1] = PETSC_FALSE; 3268 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3269 if (!n) { 3270 sameDist[1] = PETSC_TRUE; 3271 } else { 3272 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3273 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3274 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3275 } 3276 3277 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3278 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3279 sameRowDist = tsameDist[0]; 3280 } 3281 3282 if (sameRowDist) { 3283 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3284 /* isrow and iscol have same processor distribution as mat */ 3285 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3286 PetscFunctionReturn(0); 3287 } else { /* sameRowDist */ 3288 /* isrow has same processor distribution as mat */ 3289 if (call == MAT_INITIAL_MATRIX) { 3290 PetscBool sorted; 3291 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3292 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3293 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3294 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3295 3296 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3297 if (sorted) { 3298 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3299 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3300 PetscFunctionReturn(0); 3301 } 3302 } else { /* call == MAT_REUSE_MATRIX */ 3303 IS iscol_sub; 3304 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3305 if (iscol_sub) { 3306 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3307 PetscFunctionReturn(0); 3308 } 3309 } 3310 } 3311 } 3312 3313 /* General case: iscol -> iscol_local which has global size of iscol */ 3314 if (call == MAT_REUSE_MATRIX) { 3315 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3316 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3317 } else { 3318 if (!iscol_local) { 3319 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3320 } 3321 } 3322 3323 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3324 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3325 3326 if (call == MAT_INITIAL_MATRIX) { 3327 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3328 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3329 } 3330 PetscFunctionReturn(0); 3331 } 3332 3333 /*@C 3334 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3335 and "off-diagonal" part of the matrix in CSR format. 3336 3337 Collective 3338 3339 Input Parameters: 3340 + comm - MPI communicator 3341 . A - "diagonal" portion of matrix 3342 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3343 - garray - global index of B columns 3344 3345 Output Parameter: 3346 . mat - the matrix, with input A as its local diagonal matrix 3347 Level: advanced 3348 3349 Notes: 3350 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3351 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3352 3353 .seealso: MatCreateMPIAIJWithSplitArrays() 3354 @*/ 3355 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3356 { 3357 PetscErrorCode ierr; 3358 Mat_MPIAIJ *maij; 3359 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3360 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3361 PetscScalar *oa=b->a; 3362 Mat Bnew; 3363 PetscInt m,n,N; 3364 3365 PetscFunctionBegin; 3366 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3367 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3368 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3369 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3370 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3371 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3372 3373 /* Get global columns of mat */ 3374 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3375 3376 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3377 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3378 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3379 maij = (Mat_MPIAIJ*)(*mat)->data; 3380 3381 (*mat)->preallocated = PETSC_TRUE; 3382 3383 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3384 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3385 3386 /* Set A as diagonal portion of *mat */ 3387 maij->A = A; 3388 3389 nz = oi[m]; 3390 for (i=0; i<nz; i++) { 3391 col = oj[i]; 3392 oj[i] = garray[col]; 3393 } 3394 3395 /* Set Bnew as off-diagonal portion of *mat */ 3396 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3397 bnew = (Mat_SeqAIJ*)Bnew->data; 3398 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3399 maij->B = Bnew; 3400 3401 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3402 3403 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3404 b->free_a = PETSC_FALSE; 3405 b->free_ij = PETSC_FALSE; 3406 ierr = MatDestroy(&B);CHKERRQ(ierr); 3407 3408 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3409 bnew->free_a = PETSC_TRUE; 3410 bnew->free_ij = PETSC_TRUE; 3411 3412 /* condense columns of maij->B */ 3413 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3414 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3415 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3416 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3417 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3418 PetscFunctionReturn(0); 3419 } 3420 3421 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3422 3423 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3424 { 3425 PetscErrorCode ierr; 3426 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3427 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3428 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3429 Mat M,Msub,B=a->B; 3430 MatScalar *aa; 3431 Mat_SeqAIJ *aij; 3432 PetscInt *garray = a->garray,*colsub,Ncols; 3433 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3434 IS iscol_sub,iscmap; 3435 const PetscInt *is_idx,*cmap; 3436 PetscBool allcolumns=PETSC_FALSE; 3437 MPI_Comm comm; 3438 3439 PetscFunctionBegin; 3440 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3441 3442 if (call == MAT_REUSE_MATRIX) { 3443 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3444 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3445 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3446 3447 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3448 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3449 3450 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3451 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3452 3453 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3454 3455 } else { /* call == MAT_INITIAL_MATRIX) */ 3456 PetscBool flg; 3457 3458 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3459 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3460 3461 /* (1) iscol -> nonscalable iscol_local */ 3462 /* Check for special case: each processor gets entire matrix columns */ 3463 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3464 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3465 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3466 if (allcolumns) { 3467 iscol_sub = iscol_local; 3468 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3469 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3470 3471 } else { 3472 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3473 PetscInt *idx,*cmap1,k; 3474 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3475 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3476 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3477 count = 0; 3478 k = 0; 3479 for (i=0; i<Ncols; i++) { 3480 j = is_idx[i]; 3481 if (j >= cstart && j < cend) { 3482 /* diagonal part of mat */ 3483 idx[count] = j; 3484 cmap1[count++] = i; /* column index in submat */ 3485 } else if (Bn) { 3486 /* off-diagonal part of mat */ 3487 if (j == garray[k]) { 3488 idx[count] = j; 3489 cmap1[count++] = i; /* column index in submat */ 3490 } else if (j > garray[k]) { 3491 while (j > garray[k] && k < Bn-1) k++; 3492 if (j == garray[k]) { 3493 idx[count] = j; 3494 cmap1[count++] = i; /* column index in submat */ 3495 } 3496 } 3497 } 3498 } 3499 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3500 3501 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3502 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3503 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3504 3505 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3506 } 3507 3508 /* (3) Create sequential Msub */ 3509 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3510 } 3511 3512 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3513 aij = (Mat_SeqAIJ*)(Msub)->data; 3514 ii = aij->i; 3515 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3516 3517 /* 3518 m - number of local rows 3519 Ncols - number of columns (same on all processors) 3520 rstart - first row in new global matrix generated 3521 */ 3522 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3523 3524 if (call == MAT_INITIAL_MATRIX) { 3525 /* (4) Create parallel newmat */ 3526 PetscMPIInt rank,size; 3527 PetscInt csize; 3528 3529 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3530 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3531 3532 /* 3533 Determine the number of non-zeros in the diagonal and off-diagonal 3534 portions of the matrix in order to do correct preallocation 3535 */ 3536 3537 /* first get start and end of "diagonal" columns */ 3538 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3539 if (csize == PETSC_DECIDE) { 3540 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3541 if (mglobal == Ncols) { /* square matrix */ 3542 nlocal = m; 3543 } else { 3544 nlocal = Ncols/size + ((Ncols % size) > rank); 3545 } 3546 } else { 3547 nlocal = csize; 3548 } 3549 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3550 rstart = rend - nlocal; 3551 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3552 3553 /* next, compute all the lengths */ 3554 jj = aij->j; 3555 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3556 olens = dlens + m; 3557 for (i=0; i<m; i++) { 3558 jend = ii[i+1] - ii[i]; 3559 olen = 0; 3560 dlen = 0; 3561 for (j=0; j<jend; j++) { 3562 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3563 else dlen++; 3564 jj++; 3565 } 3566 olens[i] = olen; 3567 dlens[i] = dlen; 3568 } 3569 3570 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3571 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3572 3573 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3574 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3575 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3576 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3577 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3578 ierr = PetscFree(dlens);CHKERRQ(ierr); 3579 3580 } else { /* call == MAT_REUSE_MATRIX */ 3581 M = *newmat; 3582 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3583 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3584 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3585 /* 3586 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3587 rather than the slower MatSetValues(). 3588 */ 3589 M->was_assembled = PETSC_TRUE; 3590 M->assembled = PETSC_FALSE; 3591 } 3592 3593 /* (5) Set values of Msub to *newmat */ 3594 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3595 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3596 3597 jj = aij->j; 3598 aa = aij->a; 3599 for (i=0; i<m; i++) { 3600 row = rstart + i; 3601 nz = ii[i+1] - ii[i]; 3602 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3603 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3604 jj += nz; aa += nz; 3605 } 3606 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3607 3608 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3609 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3610 3611 ierr = PetscFree(colsub);CHKERRQ(ierr); 3612 3613 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3614 if (call == MAT_INITIAL_MATRIX) { 3615 *newmat = M; 3616 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3617 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3618 3619 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3620 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3621 3622 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3623 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3624 3625 if (iscol_local) { 3626 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3627 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3628 } 3629 } 3630 PetscFunctionReturn(0); 3631 } 3632 3633 /* 3634 Not great since it makes two copies of the submatrix, first an SeqAIJ 3635 in local and then by concatenating the local matrices the end result. 3636 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3637 3638 Note: This requires a sequential iscol with all indices. 3639 */ 3640 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3641 { 3642 PetscErrorCode ierr; 3643 PetscMPIInt rank,size; 3644 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3645 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3646 Mat M,Mreuse; 3647 MatScalar *aa,*vwork; 3648 MPI_Comm comm; 3649 Mat_SeqAIJ *aij; 3650 PetscBool colflag,allcolumns=PETSC_FALSE; 3651 3652 PetscFunctionBegin; 3653 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3654 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3655 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3656 3657 /* Check for special case: each processor gets entire matrix columns */ 3658 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3659 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3660 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3661 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3662 3663 if (call == MAT_REUSE_MATRIX) { 3664 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3665 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3666 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3667 } else { 3668 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3669 } 3670 3671 /* 3672 m - number of local rows 3673 n - number of columns (same on all processors) 3674 rstart - first row in new global matrix generated 3675 */ 3676 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3677 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3678 if (call == MAT_INITIAL_MATRIX) { 3679 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3680 ii = aij->i; 3681 jj = aij->j; 3682 3683 /* 3684 Determine the number of non-zeros in the diagonal and off-diagonal 3685 portions of the matrix in order to do correct preallocation 3686 */ 3687 3688 /* first get start and end of "diagonal" columns */ 3689 if (csize == PETSC_DECIDE) { 3690 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3691 if (mglobal == n) { /* square matrix */ 3692 nlocal = m; 3693 } else { 3694 nlocal = n/size + ((n % size) > rank); 3695 } 3696 } else { 3697 nlocal = csize; 3698 } 3699 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3700 rstart = rend - nlocal; 3701 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3702 3703 /* next, compute all the lengths */ 3704 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3705 olens = dlens + m; 3706 for (i=0; i<m; i++) { 3707 jend = ii[i+1] - ii[i]; 3708 olen = 0; 3709 dlen = 0; 3710 for (j=0; j<jend; j++) { 3711 if (*jj < rstart || *jj >= rend) olen++; 3712 else dlen++; 3713 jj++; 3714 } 3715 olens[i] = olen; 3716 dlens[i] = dlen; 3717 } 3718 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3719 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3720 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3721 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3722 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3723 ierr = PetscFree(dlens);CHKERRQ(ierr); 3724 } else { 3725 PetscInt ml,nl; 3726 3727 M = *newmat; 3728 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3729 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3730 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3731 /* 3732 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3733 rather than the slower MatSetValues(). 3734 */ 3735 M->was_assembled = PETSC_TRUE; 3736 M->assembled = PETSC_FALSE; 3737 } 3738 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3739 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3740 ii = aij->i; 3741 jj = aij->j; 3742 aa = aij->a; 3743 for (i=0; i<m; i++) { 3744 row = rstart + i; 3745 nz = ii[i+1] - ii[i]; 3746 cwork = jj; jj += nz; 3747 vwork = aa; aa += nz; 3748 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3749 } 3750 3751 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3752 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3753 *newmat = M; 3754 3755 /* save submatrix used in processor for next request */ 3756 if (call == MAT_INITIAL_MATRIX) { 3757 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3758 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3759 } 3760 PetscFunctionReturn(0); 3761 } 3762 3763 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3764 { 3765 PetscInt m,cstart, cend,j,nnz,i,d; 3766 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3767 const PetscInt *JJ; 3768 PetscErrorCode ierr; 3769 PetscBool nooffprocentries; 3770 3771 PetscFunctionBegin; 3772 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3773 3774 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3775 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3776 m = B->rmap->n; 3777 cstart = B->cmap->rstart; 3778 cend = B->cmap->rend; 3779 rstart = B->rmap->rstart; 3780 3781 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3782 3783 if (PetscDefined(USE_DEBUG)) { 3784 for (i=0; i<m; i++) { 3785 nnz = Ii[i+1]- Ii[i]; 3786 JJ = J + Ii[i]; 3787 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3788 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3789 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3790 } 3791 } 3792 3793 for (i=0; i<m; i++) { 3794 nnz = Ii[i+1]- Ii[i]; 3795 JJ = J + Ii[i]; 3796 nnz_max = PetscMax(nnz_max,nnz); 3797 d = 0; 3798 for (j=0; j<nnz; j++) { 3799 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3800 } 3801 d_nnz[i] = d; 3802 o_nnz[i] = nnz - d; 3803 } 3804 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3805 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3806 3807 for (i=0; i<m; i++) { 3808 ii = i + rstart; 3809 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3810 } 3811 nooffprocentries = B->nooffprocentries; 3812 B->nooffprocentries = PETSC_TRUE; 3813 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3814 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3815 B->nooffprocentries = nooffprocentries; 3816 3817 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3818 PetscFunctionReturn(0); 3819 } 3820 3821 /*@ 3822 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3823 (the default parallel PETSc format). 3824 3825 Collective 3826 3827 Input Parameters: 3828 + B - the matrix 3829 . i - the indices into j for the start of each local row (starts with zero) 3830 . j - the column indices for each local row (starts with zero) 3831 - v - optional values in the matrix 3832 3833 Level: developer 3834 3835 Notes: 3836 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3837 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3838 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3839 3840 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3841 3842 The format which is used for the sparse matrix input, is equivalent to a 3843 row-major ordering.. i.e for the following matrix, the input data expected is 3844 as shown 3845 3846 $ 1 0 0 3847 $ 2 0 3 P0 3848 $ ------- 3849 $ 4 5 6 P1 3850 $ 3851 $ Process0 [P0]: rows_owned=[0,1] 3852 $ i = {0,1,3} [size = nrow+1 = 2+1] 3853 $ j = {0,0,2} [size = 3] 3854 $ v = {1,2,3} [size = 3] 3855 $ 3856 $ Process1 [P1]: rows_owned=[2] 3857 $ i = {0,3} [size = nrow+1 = 1+1] 3858 $ j = {0,1,2} [size = 3] 3859 $ v = {4,5,6} [size = 3] 3860 3861 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3862 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3863 @*/ 3864 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3865 { 3866 PetscErrorCode ierr; 3867 3868 PetscFunctionBegin; 3869 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3870 PetscFunctionReturn(0); 3871 } 3872 3873 /*@C 3874 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3875 (the default parallel PETSc format). For good matrix assembly performance 3876 the user should preallocate the matrix storage by setting the parameters 3877 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3878 performance can be increased by more than a factor of 50. 3879 3880 Collective 3881 3882 Input Parameters: 3883 + B - the matrix 3884 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3885 (same value is used for all local rows) 3886 . d_nnz - array containing the number of nonzeros in the various rows of the 3887 DIAGONAL portion of the local submatrix (possibly different for each row) 3888 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3889 The size of this array is equal to the number of local rows, i.e 'm'. 3890 For matrices that will be factored, you must leave room for (and set) 3891 the diagonal entry even if it is zero. 3892 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3893 submatrix (same value is used for all local rows). 3894 - o_nnz - array containing the number of nonzeros in the various rows of the 3895 OFF-DIAGONAL portion of the local submatrix (possibly different for 3896 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3897 structure. The size of this array is equal to the number 3898 of local rows, i.e 'm'. 3899 3900 If the *_nnz parameter is given then the *_nz parameter is ignored 3901 3902 The AIJ format (also called the Yale sparse matrix format or 3903 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3904 storage. The stored row and column indices begin with zero. 3905 See Users-Manual: ch_mat for details. 3906 3907 The parallel matrix is partitioned such that the first m0 rows belong to 3908 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3909 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3910 3911 The DIAGONAL portion of the local submatrix of a processor can be defined 3912 as the submatrix which is obtained by extraction the part corresponding to 3913 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3914 first row that belongs to the processor, r2 is the last row belonging to 3915 the this processor, and c1-c2 is range of indices of the local part of a 3916 vector suitable for applying the matrix to. This is an mxn matrix. In the 3917 common case of a square matrix, the row and column ranges are the same and 3918 the DIAGONAL part is also square. The remaining portion of the local 3919 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3920 3921 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3922 3923 You can call MatGetInfo() to get information on how effective the preallocation was; 3924 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3925 You can also run with the option -info and look for messages with the string 3926 malloc in them to see if additional memory allocation was needed. 3927 3928 Example usage: 3929 3930 Consider the following 8x8 matrix with 34 non-zero values, that is 3931 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3932 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3933 as follows: 3934 3935 .vb 3936 1 2 0 | 0 3 0 | 0 4 3937 Proc0 0 5 6 | 7 0 0 | 8 0 3938 9 0 10 | 11 0 0 | 12 0 3939 ------------------------------------- 3940 13 0 14 | 15 16 17 | 0 0 3941 Proc1 0 18 0 | 19 20 21 | 0 0 3942 0 0 0 | 22 23 0 | 24 0 3943 ------------------------------------- 3944 Proc2 25 26 27 | 0 0 28 | 29 0 3945 30 0 0 | 31 32 33 | 0 34 3946 .ve 3947 3948 This can be represented as a collection of submatrices as: 3949 3950 .vb 3951 A B C 3952 D E F 3953 G H I 3954 .ve 3955 3956 Where the submatrices A,B,C are owned by proc0, D,E,F are 3957 owned by proc1, G,H,I are owned by proc2. 3958 3959 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3960 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3961 The 'M','N' parameters are 8,8, and have the same values on all procs. 3962 3963 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3964 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3965 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3966 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3967 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3968 matrix, ans [DF] as another SeqAIJ matrix. 3969 3970 When d_nz, o_nz parameters are specified, d_nz storage elements are 3971 allocated for every row of the local diagonal submatrix, and o_nz 3972 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3973 One way to choose d_nz and o_nz is to use the max nonzerors per local 3974 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3975 In this case, the values of d_nz,o_nz are: 3976 .vb 3977 proc0 : dnz = 2, o_nz = 2 3978 proc1 : dnz = 3, o_nz = 2 3979 proc2 : dnz = 1, o_nz = 4 3980 .ve 3981 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3982 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3983 for proc3. i.e we are using 12+15+10=37 storage locations to store 3984 34 values. 3985 3986 When d_nnz, o_nnz parameters are specified, the storage is specified 3987 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3988 In the above case the values for d_nnz,o_nnz are: 3989 .vb 3990 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3991 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3992 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3993 .ve 3994 Here the space allocated is sum of all the above values i.e 34, and 3995 hence pre-allocation is perfect. 3996 3997 Level: intermediate 3998 3999 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4000 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4001 @*/ 4002 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4003 { 4004 PetscErrorCode ierr; 4005 4006 PetscFunctionBegin; 4007 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4008 PetscValidType(B,1); 4009 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4010 PetscFunctionReturn(0); 4011 } 4012 4013 /*@ 4014 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4015 CSR format for the local rows. 4016 4017 Collective 4018 4019 Input Parameters: 4020 + comm - MPI communicator 4021 . m - number of local rows (Cannot be PETSC_DECIDE) 4022 . n - This value should be the same as the local size used in creating the 4023 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4024 calculated if N is given) For square matrices n is almost always m. 4025 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4026 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4027 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4028 . j - column indices 4029 - a - matrix values 4030 4031 Output Parameter: 4032 . mat - the matrix 4033 4034 Level: intermediate 4035 4036 Notes: 4037 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4038 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4039 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4040 4041 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4042 4043 The format which is used for the sparse matrix input, is equivalent to a 4044 row-major ordering.. i.e for the following matrix, the input data expected is 4045 as shown 4046 4047 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4048 4049 $ 1 0 0 4050 $ 2 0 3 P0 4051 $ ------- 4052 $ 4 5 6 P1 4053 $ 4054 $ Process0 [P0]: rows_owned=[0,1] 4055 $ i = {0,1,3} [size = nrow+1 = 2+1] 4056 $ j = {0,0,2} [size = 3] 4057 $ v = {1,2,3} [size = 3] 4058 $ 4059 $ Process1 [P1]: rows_owned=[2] 4060 $ i = {0,3} [size = nrow+1 = 1+1] 4061 $ j = {0,1,2} [size = 3] 4062 $ v = {4,5,6} [size = 3] 4063 4064 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4065 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4066 @*/ 4067 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4068 { 4069 PetscErrorCode ierr; 4070 4071 PetscFunctionBegin; 4072 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4073 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4074 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4075 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4076 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4077 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4078 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4079 PetscFunctionReturn(0); 4080 } 4081 4082 /*@ 4083 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4084 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4085 4086 Collective 4087 4088 Input Parameters: 4089 + mat - the matrix 4090 . m - number of local rows (Cannot be PETSC_DECIDE) 4091 . n - This value should be the same as the local size used in creating the 4092 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4093 calculated if N is given) For square matrices n is almost always m. 4094 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4095 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4096 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4097 . J - column indices 4098 - v - matrix values 4099 4100 Level: intermediate 4101 4102 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4103 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4104 @*/ 4105 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4106 { 4107 PetscErrorCode ierr; 4108 PetscInt cstart,nnz,i,j; 4109 PetscInt *ld; 4110 PetscBool nooffprocentries; 4111 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4112 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4113 PetscScalar *ad = Ad->a, *ao = Ao->a; 4114 const PetscInt *Adi = Ad->i; 4115 PetscInt ldi,Iii,md; 4116 4117 PetscFunctionBegin; 4118 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4119 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4120 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4121 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4122 4123 cstart = mat->cmap->rstart; 4124 if (!Aij->ld) { 4125 /* count number of entries below block diagonal */ 4126 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4127 Aij->ld = ld; 4128 for (i=0; i<m; i++) { 4129 nnz = Ii[i+1]- Ii[i]; 4130 j = 0; 4131 while (J[j] < cstart && j < nnz) {j++;} 4132 J += nnz; 4133 ld[i] = j; 4134 } 4135 } else { 4136 ld = Aij->ld; 4137 } 4138 4139 for (i=0; i<m; i++) { 4140 nnz = Ii[i+1]- Ii[i]; 4141 Iii = Ii[i]; 4142 ldi = ld[i]; 4143 md = Adi[i+1]-Adi[i]; 4144 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4145 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4146 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4147 ad += md; 4148 ao += nnz - md; 4149 } 4150 nooffprocentries = mat->nooffprocentries; 4151 mat->nooffprocentries = PETSC_TRUE; 4152 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4153 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4154 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4155 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4156 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4157 mat->nooffprocentries = nooffprocentries; 4158 PetscFunctionReturn(0); 4159 } 4160 4161 /*@C 4162 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4163 (the default parallel PETSc format). For good matrix assembly performance 4164 the user should preallocate the matrix storage by setting the parameters 4165 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4166 performance can be increased by more than a factor of 50. 4167 4168 Collective 4169 4170 Input Parameters: 4171 + comm - MPI communicator 4172 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4173 This value should be the same as the local size used in creating the 4174 y vector for the matrix-vector product y = Ax. 4175 . n - This value should be the same as the local size used in creating the 4176 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4177 calculated if N is given) For square matrices n is almost always m. 4178 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4179 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4180 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4181 (same value is used for all local rows) 4182 . d_nnz - array containing the number of nonzeros in the various rows of the 4183 DIAGONAL portion of the local submatrix (possibly different for each row) 4184 or NULL, if d_nz is used to specify the nonzero structure. 4185 The size of this array is equal to the number of local rows, i.e 'm'. 4186 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4187 submatrix (same value is used for all local rows). 4188 - o_nnz - array containing the number of nonzeros in the various rows of the 4189 OFF-DIAGONAL portion of the local submatrix (possibly different for 4190 each row) or NULL, if o_nz is used to specify the nonzero 4191 structure. The size of this array is equal to the number 4192 of local rows, i.e 'm'. 4193 4194 Output Parameter: 4195 . A - the matrix 4196 4197 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4198 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4199 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4200 4201 Notes: 4202 If the *_nnz parameter is given then the *_nz parameter is ignored 4203 4204 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4205 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4206 storage requirements for this matrix. 4207 4208 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4209 processor than it must be used on all processors that share the object for 4210 that argument. 4211 4212 The user MUST specify either the local or global matrix dimensions 4213 (possibly both). 4214 4215 The parallel matrix is partitioned across processors such that the 4216 first m0 rows belong to process 0, the next m1 rows belong to 4217 process 1, the next m2 rows belong to process 2 etc.. where 4218 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4219 values corresponding to [m x N] submatrix. 4220 4221 The columns are logically partitioned with the n0 columns belonging 4222 to 0th partition, the next n1 columns belonging to the next 4223 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4224 4225 The DIAGONAL portion of the local submatrix on any given processor 4226 is the submatrix corresponding to the rows and columns m,n 4227 corresponding to the given processor. i.e diagonal matrix on 4228 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4229 etc. The remaining portion of the local submatrix [m x (N-n)] 4230 constitute the OFF-DIAGONAL portion. The example below better 4231 illustrates this concept. 4232 4233 For a square global matrix we define each processor's diagonal portion 4234 to be its local rows and the corresponding columns (a square submatrix); 4235 each processor's off-diagonal portion encompasses the remainder of the 4236 local matrix (a rectangular submatrix). 4237 4238 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4239 4240 When calling this routine with a single process communicator, a matrix of 4241 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4242 type of communicator, use the construction mechanism 4243 .vb 4244 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4245 .ve 4246 4247 $ MatCreate(...,&A); 4248 $ MatSetType(A,MATMPIAIJ); 4249 $ MatSetSizes(A, m,n,M,N); 4250 $ MatMPIAIJSetPreallocation(A,...); 4251 4252 By default, this format uses inodes (identical nodes) when possible. 4253 We search for consecutive rows with the same nonzero structure, thereby 4254 reusing matrix information to achieve increased efficiency. 4255 4256 Options Database Keys: 4257 + -mat_no_inode - Do not use inodes 4258 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4259 4260 4261 4262 Example usage: 4263 4264 Consider the following 8x8 matrix with 34 non-zero values, that is 4265 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4266 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4267 as follows 4268 4269 .vb 4270 1 2 0 | 0 3 0 | 0 4 4271 Proc0 0 5 6 | 7 0 0 | 8 0 4272 9 0 10 | 11 0 0 | 12 0 4273 ------------------------------------- 4274 13 0 14 | 15 16 17 | 0 0 4275 Proc1 0 18 0 | 19 20 21 | 0 0 4276 0 0 0 | 22 23 0 | 24 0 4277 ------------------------------------- 4278 Proc2 25 26 27 | 0 0 28 | 29 0 4279 30 0 0 | 31 32 33 | 0 34 4280 .ve 4281 4282 This can be represented as a collection of submatrices as 4283 4284 .vb 4285 A B C 4286 D E F 4287 G H I 4288 .ve 4289 4290 Where the submatrices A,B,C are owned by proc0, D,E,F are 4291 owned by proc1, G,H,I are owned by proc2. 4292 4293 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4294 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4295 The 'M','N' parameters are 8,8, and have the same values on all procs. 4296 4297 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4298 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4299 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4300 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4301 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4302 matrix, ans [DF] as another SeqAIJ matrix. 4303 4304 When d_nz, o_nz parameters are specified, d_nz storage elements are 4305 allocated for every row of the local diagonal submatrix, and o_nz 4306 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4307 One way to choose d_nz and o_nz is to use the max nonzerors per local 4308 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4309 In this case, the values of d_nz,o_nz are 4310 .vb 4311 proc0 : dnz = 2, o_nz = 2 4312 proc1 : dnz = 3, o_nz = 2 4313 proc2 : dnz = 1, o_nz = 4 4314 .ve 4315 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4316 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4317 for proc3. i.e we are using 12+15+10=37 storage locations to store 4318 34 values. 4319 4320 When d_nnz, o_nnz parameters are specified, the storage is specified 4321 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4322 In the above case the values for d_nnz,o_nnz are 4323 .vb 4324 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4325 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4326 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4327 .ve 4328 Here the space allocated is sum of all the above values i.e 34, and 4329 hence pre-allocation is perfect. 4330 4331 Level: intermediate 4332 4333 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4334 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4335 @*/ 4336 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4337 { 4338 PetscErrorCode ierr; 4339 PetscMPIInt size; 4340 4341 PetscFunctionBegin; 4342 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4343 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4344 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4345 if (size > 1) { 4346 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4347 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4348 } else { 4349 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4350 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4351 } 4352 PetscFunctionReturn(0); 4353 } 4354 4355 /*@C 4356 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4357 4358 Not collective 4359 4360 Input Parameter: 4361 . A - The MPIAIJ matrix 4362 4363 Output Parameters: 4364 + Ad - The local diagonal block as a SeqAIJ matrix 4365 . Ao - The local off-diagonal block as a SeqAIJ matrix 4366 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4367 4368 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4369 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4370 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4371 local column numbers to global column numbers in the original matrix. 4372 4373 Level: intermediate 4374 4375 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4376 @*/ 4377 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4378 { 4379 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4380 PetscBool flg; 4381 PetscErrorCode ierr; 4382 4383 PetscFunctionBegin; 4384 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4385 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4386 if (Ad) *Ad = a->A; 4387 if (Ao) *Ao = a->B; 4388 if (colmap) *colmap = a->garray; 4389 PetscFunctionReturn(0); 4390 } 4391 4392 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4393 { 4394 PetscErrorCode ierr; 4395 PetscInt m,N,i,rstart,nnz,Ii; 4396 PetscInt *indx; 4397 PetscScalar *values; 4398 4399 PetscFunctionBegin; 4400 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4401 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4402 PetscInt *dnz,*onz,sum,bs,cbs; 4403 4404 if (n == PETSC_DECIDE) { 4405 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4406 } 4407 /* Check sum(n) = N */ 4408 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4409 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4410 4411 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4412 rstart -= m; 4413 4414 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4415 for (i=0; i<m; i++) { 4416 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4417 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4418 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4419 } 4420 4421 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4422 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4423 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4424 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4425 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4426 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4427 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4428 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4429 } 4430 4431 /* numeric phase */ 4432 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4433 for (i=0; i<m; i++) { 4434 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4435 Ii = i + rstart; 4436 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4437 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4438 } 4439 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4440 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4441 PetscFunctionReturn(0); 4442 } 4443 4444 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4445 { 4446 PetscErrorCode ierr; 4447 PetscMPIInt rank; 4448 PetscInt m,N,i,rstart,nnz; 4449 size_t len; 4450 const PetscInt *indx; 4451 PetscViewer out; 4452 char *name; 4453 Mat B; 4454 const PetscScalar *values; 4455 4456 PetscFunctionBegin; 4457 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4458 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4459 /* Should this be the type of the diagonal block of A? */ 4460 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4461 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4462 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4463 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4464 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4465 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4466 for (i=0; i<m; i++) { 4467 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4468 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4469 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4470 } 4471 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4472 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4473 4474 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4475 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4476 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4477 sprintf(name,"%s.%d",outfile,rank); 4478 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4479 ierr = PetscFree(name);CHKERRQ(ierr); 4480 ierr = MatView(B,out);CHKERRQ(ierr); 4481 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4482 ierr = MatDestroy(&B);CHKERRQ(ierr); 4483 PetscFunctionReturn(0); 4484 } 4485 4486 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4487 { 4488 PetscErrorCode ierr; 4489 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4490 4491 PetscFunctionBegin; 4492 if (!merge) PetscFunctionReturn(0); 4493 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4505 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4506 ierr = PetscFree(merge);CHKERRQ(ierr); 4507 PetscFunctionReturn(0); 4508 } 4509 4510 #include <../src/mat/utils/freespace.h> 4511 #include <petscbt.h> 4512 4513 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4514 { 4515 PetscErrorCode ierr; 4516 MPI_Comm comm; 4517 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4518 PetscMPIInt size,rank,taga,*len_s; 4519 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4520 PetscInt proc,m; 4521 PetscInt **buf_ri,**buf_rj; 4522 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4523 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4524 MPI_Request *s_waits,*r_waits; 4525 MPI_Status *status; 4526 MatScalar *aa=a->a; 4527 MatScalar **abuf_r,*ba_i; 4528 Mat_Merge_SeqsToMPI *merge; 4529 PetscContainer container; 4530 4531 PetscFunctionBegin; 4532 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4533 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4534 4535 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4536 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4537 4538 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4539 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4540 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4541 4542 bi = merge->bi; 4543 bj = merge->bj; 4544 buf_ri = merge->buf_ri; 4545 buf_rj = merge->buf_rj; 4546 4547 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4548 owners = merge->rowmap->range; 4549 len_s = merge->len_s; 4550 4551 /* send and recv matrix values */ 4552 /*-----------------------------*/ 4553 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4554 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4555 4556 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4557 for (proc=0,k=0; proc<size; proc++) { 4558 if (!len_s[proc]) continue; 4559 i = owners[proc]; 4560 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4561 k++; 4562 } 4563 4564 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4565 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4566 ierr = PetscFree(status);CHKERRQ(ierr); 4567 4568 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4569 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4570 4571 /* insert mat values of mpimat */ 4572 /*----------------------------*/ 4573 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4574 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4575 4576 for (k=0; k<merge->nrecv; k++) { 4577 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4578 nrows = *(buf_ri_k[k]); 4579 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4580 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4581 } 4582 4583 /* set values of ba */ 4584 m = merge->rowmap->n; 4585 for (i=0; i<m; i++) { 4586 arow = owners[rank] + i; 4587 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4588 bnzi = bi[i+1] - bi[i]; 4589 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4590 4591 /* add local non-zero vals of this proc's seqmat into ba */ 4592 anzi = ai[arow+1] - ai[arow]; 4593 aj = a->j + ai[arow]; 4594 aa = a->a + ai[arow]; 4595 nextaj = 0; 4596 for (j=0; nextaj<anzi; j++) { 4597 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4598 ba_i[j] += aa[nextaj++]; 4599 } 4600 } 4601 4602 /* add received vals into ba */ 4603 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4604 /* i-th row */ 4605 if (i == *nextrow[k]) { 4606 anzi = *(nextai[k]+1) - *nextai[k]; 4607 aj = buf_rj[k] + *(nextai[k]); 4608 aa = abuf_r[k] + *(nextai[k]); 4609 nextaj = 0; 4610 for (j=0; nextaj<anzi; j++) { 4611 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4612 ba_i[j] += aa[nextaj++]; 4613 } 4614 } 4615 nextrow[k]++; nextai[k]++; 4616 } 4617 } 4618 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4619 } 4620 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4621 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4622 4623 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4624 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4625 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4626 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4627 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4628 PetscFunctionReturn(0); 4629 } 4630 4631 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4632 { 4633 PetscErrorCode ierr; 4634 Mat B_mpi; 4635 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4636 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4637 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4638 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4639 PetscInt len,proc,*dnz,*onz,bs,cbs; 4640 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4641 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4642 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4643 MPI_Status *status; 4644 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4645 PetscBT lnkbt; 4646 Mat_Merge_SeqsToMPI *merge; 4647 PetscContainer container; 4648 4649 PetscFunctionBegin; 4650 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4651 4652 /* make sure it is a PETSc comm */ 4653 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4654 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4655 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4656 4657 ierr = PetscNew(&merge);CHKERRQ(ierr); 4658 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4659 4660 /* determine row ownership */ 4661 /*---------------------------------------------------------*/ 4662 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4663 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4664 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4665 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4666 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4667 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4668 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4669 4670 m = merge->rowmap->n; 4671 owners = merge->rowmap->range; 4672 4673 /* determine the number of messages to send, their lengths */ 4674 /*---------------------------------------------------------*/ 4675 len_s = merge->len_s; 4676 4677 len = 0; /* length of buf_si[] */ 4678 merge->nsend = 0; 4679 for (proc=0; proc<size; proc++) { 4680 len_si[proc] = 0; 4681 if (proc == rank) { 4682 len_s[proc] = 0; 4683 } else { 4684 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4685 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4686 } 4687 if (len_s[proc]) { 4688 merge->nsend++; 4689 nrows = 0; 4690 for (i=owners[proc]; i<owners[proc+1]; i++) { 4691 if (ai[i+1] > ai[i]) nrows++; 4692 } 4693 len_si[proc] = 2*(nrows+1); 4694 len += len_si[proc]; 4695 } 4696 } 4697 4698 /* determine the number and length of messages to receive for ij-structure */ 4699 /*-------------------------------------------------------------------------*/ 4700 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4701 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4702 4703 /* post the Irecv of j-structure */ 4704 /*-------------------------------*/ 4705 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4706 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4707 4708 /* post the Isend of j-structure */ 4709 /*--------------------------------*/ 4710 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4711 4712 for (proc=0, k=0; proc<size; proc++) { 4713 if (!len_s[proc]) continue; 4714 i = owners[proc]; 4715 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4716 k++; 4717 } 4718 4719 /* receives and sends of j-structure are complete */ 4720 /*------------------------------------------------*/ 4721 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4722 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4723 4724 /* send and recv i-structure */ 4725 /*---------------------------*/ 4726 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4727 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4728 4729 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4730 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4731 for (proc=0,k=0; proc<size; proc++) { 4732 if (!len_s[proc]) continue; 4733 /* form outgoing message for i-structure: 4734 buf_si[0]: nrows to be sent 4735 [1:nrows]: row index (global) 4736 [nrows+1:2*nrows+1]: i-structure index 4737 */ 4738 /*-------------------------------------------*/ 4739 nrows = len_si[proc]/2 - 1; 4740 buf_si_i = buf_si + nrows+1; 4741 buf_si[0] = nrows; 4742 buf_si_i[0] = 0; 4743 nrows = 0; 4744 for (i=owners[proc]; i<owners[proc+1]; i++) { 4745 anzi = ai[i+1] - ai[i]; 4746 if (anzi) { 4747 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4748 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4749 nrows++; 4750 } 4751 } 4752 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4753 k++; 4754 buf_si += len_si[proc]; 4755 } 4756 4757 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4758 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4759 4760 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4761 for (i=0; i<merge->nrecv; i++) { 4762 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4763 } 4764 4765 ierr = PetscFree(len_si);CHKERRQ(ierr); 4766 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4767 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4768 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4769 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4770 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4771 ierr = PetscFree(status);CHKERRQ(ierr); 4772 4773 /* compute a local seq matrix in each processor */ 4774 /*----------------------------------------------*/ 4775 /* allocate bi array and free space for accumulating nonzero column info */ 4776 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4777 bi[0] = 0; 4778 4779 /* create and initialize a linked list */ 4780 nlnk = N+1; 4781 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4782 4783 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4784 len = ai[owners[rank+1]] - ai[owners[rank]]; 4785 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4786 4787 current_space = free_space; 4788 4789 /* determine symbolic info for each local row */ 4790 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4791 4792 for (k=0; k<merge->nrecv; k++) { 4793 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4794 nrows = *buf_ri_k[k]; 4795 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4796 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4797 } 4798 4799 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4800 len = 0; 4801 for (i=0; i<m; i++) { 4802 bnzi = 0; 4803 /* add local non-zero cols of this proc's seqmat into lnk */ 4804 arow = owners[rank] + i; 4805 anzi = ai[arow+1] - ai[arow]; 4806 aj = a->j + ai[arow]; 4807 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4808 bnzi += nlnk; 4809 /* add received col data into lnk */ 4810 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4811 if (i == *nextrow[k]) { /* i-th row */ 4812 anzi = *(nextai[k]+1) - *nextai[k]; 4813 aj = buf_rj[k] + *nextai[k]; 4814 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4815 bnzi += nlnk; 4816 nextrow[k]++; nextai[k]++; 4817 } 4818 } 4819 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4820 4821 /* if free space is not available, make more free space */ 4822 if (current_space->local_remaining<bnzi) { 4823 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4824 nspacedouble++; 4825 } 4826 /* copy data into free space, then initialize lnk */ 4827 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4828 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4829 4830 current_space->array += bnzi; 4831 current_space->local_used += bnzi; 4832 current_space->local_remaining -= bnzi; 4833 4834 bi[i+1] = bi[i] + bnzi; 4835 } 4836 4837 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4838 4839 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4840 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4841 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4842 4843 /* create symbolic parallel matrix B_mpi */ 4844 /*---------------------------------------*/ 4845 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4846 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4847 if (n==PETSC_DECIDE) { 4848 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4849 } else { 4850 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4851 } 4852 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4853 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4854 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4855 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4856 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4857 4858 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4859 B_mpi->assembled = PETSC_FALSE; 4860 merge->bi = bi; 4861 merge->bj = bj; 4862 merge->buf_ri = buf_ri; 4863 merge->buf_rj = buf_rj; 4864 merge->coi = NULL; 4865 merge->coj = NULL; 4866 merge->owners_co = NULL; 4867 4868 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4869 4870 /* attach the supporting struct to B_mpi for reuse */ 4871 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4872 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4873 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4874 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4875 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4876 *mpimat = B_mpi; 4877 4878 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4879 PetscFunctionReturn(0); 4880 } 4881 4882 /*@C 4883 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4884 matrices from each processor 4885 4886 Collective 4887 4888 Input Parameters: 4889 + comm - the communicators the parallel matrix will live on 4890 . seqmat - the input sequential matrices 4891 . m - number of local rows (or PETSC_DECIDE) 4892 . n - number of local columns (or PETSC_DECIDE) 4893 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4894 4895 Output Parameter: 4896 . mpimat - the parallel matrix generated 4897 4898 Level: advanced 4899 4900 Notes: 4901 The dimensions of the sequential matrix in each processor MUST be the same. 4902 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4903 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4904 @*/ 4905 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4906 { 4907 PetscErrorCode ierr; 4908 PetscMPIInt size; 4909 4910 PetscFunctionBegin; 4911 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4912 if (size == 1) { 4913 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4914 if (scall == MAT_INITIAL_MATRIX) { 4915 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4916 } else { 4917 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4918 } 4919 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4920 PetscFunctionReturn(0); 4921 } 4922 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4923 if (scall == MAT_INITIAL_MATRIX) { 4924 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4925 } 4926 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4927 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4928 PetscFunctionReturn(0); 4929 } 4930 4931 /*@ 4932 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4933 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4934 with MatGetSize() 4935 4936 Not Collective 4937 4938 Input Parameters: 4939 + A - the matrix 4940 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4941 4942 Output Parameter: 4943 . A_loc - the local sequential matrix generated 4944 4945 Level: developer 4946 4947 Notes: 4948 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4949 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4950 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4951 modify the values of the returned A_loc. 4952 4953 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4954 4955 @*/ 4956 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4957 { 4958 PetscErrorCode ierr; 4959 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4960 Mat_SeqAIJ *mat,*a,*b; 4961 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4962 MatScalar *aa,*ba,*cam; 4963 PetscScalar *ca; 4964 PetscMPIInt size; 4965 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4966 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4967 PetscBool match; 4968 4969 PetscFunctionBegin; 4970 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 4971 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4972 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4973 if (size == 1) { 4974 if (scall == MAT_INITIAL_MATRIX) { 4975 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 4976 *A_loc = mpimat->A; 4977 } else if (scall == MAT_REUSE_MATRIX) { 4978 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4979 } 4980 PetscFunctionReturn(0); 4981 } 4982 4983 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4984 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4985 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4986 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4987 aa = a->a; ba = b->a; 4988 if (scall == MAT_INITIAL_MATRIX) { 4989 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4990 ci[0] = 0; 4991 for (i=0; i<am; i++) { 4992 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4993 } 4994 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4995 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4996 k = 0; 4997 for (i=0; i<am; i++) { 4998 ncols_o = bi[i+1] - bi[i]; 4999 ncols_d = ai[i+1] - ai[i]; 5000 /* off-diagonal portion of A */ 5001 for (jo=0; jo<ncols_o; jo++) { 5002 col = cmap[*bj]; 5003 if (col >= cstart) break; 5004 cj[k] = col; bj++; 5005 ca[k++] = *ba++; 5006 } 5007 /* diagonal portion of A */ 5008 for (j=0; j<ncols_d; j++) { 5009 cj[k] = cstart + *aj++; 5010 ca[k++] = *aa++; 5011 } 5012 /* off-diagonal portion of A */ 5013 for (j=jo; j<ncols_o; j++) { 5014 cj[k] = cmap[*bj++]; 5015 ca[k++] = *ba++; 5016 } 5017 } 5018 /* put together the new matrix */ 5019 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5020 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5021 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5022 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5023 mat->free_a = PETSC_TRUE; 5024 mat->free_ij = PETSC_TRUE; 5025 mat->nonew = 0; 5026 } else if (scall == MAT_REUSE_MATRIX) { 5027 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5028 ci = mat->i; cj = mat->j; cam = mat->a; 5029 for (i=0; i<am; i++) { 5030 /* off-diagonal portion of A */ 5031 ncols_o = bi[i+1] - bi[i]; 5032 for (jo=0; jo<ncols_o; jo++) { 5033 col = cmap[*bj]; 5034 if (col >= cstart) break; 5035 *cam++ = *ba++; bj++; 5036 } 5037 /* diagonal portion of A */ 5038 ncols_d = ai[i+1] - ai[i]; 5039 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5040 /* off-diagonal portion of A */ 5041 for (j=jo; j<ncols_o; j++) { 5042 *cam++ = *ba++; bj++; 5043 } 5044 } 5045 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5046 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5047 PetscFunctionReturn(0); 5048 } 5049 5050 /*@C 5051 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5052 5053 Not Collective 5054 5055 Input Parameters: 5056 + A - the matrix 5057 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5058 - row, col - index sets of rows and columns to extract (or NULL) 5059 5060 Output Parameter: 5061 . A_loc - the local sequential matrix generated 5062 5063 Level: developer 5064 5065 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5066 5067 @*/ 5068 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5069 { 5070 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5071 PetscErrorCode ierr; 5072 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5073 IS isrowa,iscola; 5074 Mat *aloc; 5075 PetscBool match; 5076 5077 PetscFunctionBegin; 5078 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5079 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5080 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5081 if (!row) { 5082 start = A->rmap->rstart; end = A->rmap->rend; 5083 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5084 } else { 5085 isrowa = *row; 5086 } 5087 if (!col) { 5088 start = A->cmap->rstart; 5089 cmap = a->garray; 5090 nzA = a->A->cmap->n; 5091 nzB = a->B->cmap->n; 5092 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5093 ncols = 0; 5094 for (i=0; i<nzB; i++) { 5095 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5096 else break; 5097 } 5098 imark = i; 5099 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5100 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5101 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5102 } else { 5103 iscola = *col; 5104 } 5105 if (scall != MAT_INITIAL_MATRIX) { 5106 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5107 aloc[0] = *A_loc; 5108 } 5109 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5110 if (!col) { /* attach global id of condensed columns */ 5111 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5112 } 5113 *A_loc = aloc[0]; 5114 ierr = PetscFree(aloc);CHKERRQ(ierr); 5115 if (!row) { 5116 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5117 } 5118 if (!col) { 5119 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5120 } 5121 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5122 PetscFunctionReturn(0); 5123 } 5124 5125 /* 5126 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5127 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5128 * on a global size. 5129 * */ 5130 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5131 { 5132 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5133 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5134 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5135 PetscMPIInt owner; 5136 PetscSFNode *iremote,*oiremote; 5137 const PetscInt *lrowindices; 5138 PetscErrorCode ierr; 5139 PetscSF sf,osf; 5140 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5141 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5142 MPI_Comm comm; 5143 ISLocalToGlobalMapping mapping; 5144 5145 PetscFunctionBegin; 5146 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5147 /* plocalsize is the number of roots 5148 * nrows is the number of leaves 5149 * */ 5150 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5151 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5152 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5153 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5154 for (i=0;i<nrows;i++) { 5155 /* Find a remote index and an owner for a row 5156 * The row could be local or remote 5157 * */ 5158 owner = 0; 5159 lidx = 0; 5160 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5161 iremote[i].index = lidx; 5162 iremote[i].rank = owner; 5163 } 5164 /* Create SF to communicate how many nonzero columns for each row */ 5165 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5166 /* SF will figure out the number of nonzero colunms for each row, and their 5167 * offsets 5168 * */ 5169 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5170 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5171 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5172 5173 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5174 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5175 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5176 roffsets[0] = 0; 5177 roffsets[1] = 0; 5178 for (i=0;i<plocalsize;i++) { 5179 /* diag */ 5180 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5181 /* off diag */ 5182 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5183 /* compute offsets so that we relative location for each row */ 5184 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5185 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5186 } 5187 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5188 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5189 /* 'r' means root, and 'l' means leaf */ 5190 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5191 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5192 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5193 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5194 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5195 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5196 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5197 dntotalcols = 0; 5198 ontotalcols = 0; 5199 ncol = 0; 5200 for (i=0;i<nrows;i++) { 5201 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5202 ncol = PetscMax(pnnz[i],ncol); 5203 /* diag */ 5204 dntotalcols += nlcols[i*2+0]; 5205 /* off diag */ 5206 ontotalcols += nlcols[i*2+1]; 5207 } 5208 /* We do not need to figure the right number of columns 5209 * since all the calculations will be done by going through the raw data 5210 * */ 5211 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5212 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5213 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5214 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5215 /* diag */ 5216 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5217 /* off diag */ 5218 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5219 /* diag */ 5220 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5221 /* off diag */ 5222 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5223 dntotalcols = 0; 5224 ontotalcols = 0; 5225 ntotalcols = 0; 5226 for (i=0;i<nrows;i++) { 5227 owner = 0; 5228 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5229 /* Set iremote for diag matrix */ 5230 for (j=0;j<nlcols[i*2+0];j++) { 5231 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5232 iremote[dntotalcols].rank = owner; 5233 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5234 ilocal[dntotalcols++] = ntotalcols++; 5235 } 5236 /* off diag */ 5237 for (j=0;j<nlcols[i*2+1];j++) { 5238 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5239 oiremote[ontotalcols].rank = owner; 5240 oilocal[ontotalcols++] = ntotalcols++; 5241 } 5242 } 5243 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5244 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5245 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5246 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5247 /* P serves as roots and P_oth is leaves 5248 * Diag matrix 5249 * */ 5250 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5251 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5252 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5253 5254 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5255 /* Off diag */ 5256 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5257 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5258 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5259 /* We operate on the matrix internal data for saving memory */ 5260 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5261 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5262 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5263 /* Convert to global indices for diag matrix */ 5264 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5265 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5266 /* We want P_oth store global indices */ 5267 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5268 /* Use memory scalable approach */ 5269 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5270 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5271 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5272 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5273 /* Convert back to local indices */ 5274 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5275 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5276 nout = 0; 5277 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5278 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5279 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5280 /* Exchange values */ 5281 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5282 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5283 /* Stop PETSc from shrinking memory */ 5284 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5285 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5286 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5287 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5288 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5289 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5290 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5291 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5292 PetscFunctionReturn(0); 5293 } 5294 5295 /* 5296 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5297 * This supports MPIAIJ and MAIJ 5298 * */ 5299 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5300 { 5301 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5302 Mat_SeqAIJ *p_oth; 5303 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5304 IS rows,map; 5305 PetscHMapI hamp; 5306 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5307 MPI_Comm comm; 5308 PetscSF sf,osf; 5309 PetscBool has; 5310 PetscErrorCode ierr; 5311 5312 PetscFunctionBegin; 5313 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5314 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5315 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5316 * and then create a submatrix (that often is an overlapping matrix) 5317 * */ 5318 if (reuse == MAT_INITIAL_MATRIX) { 5319 /* Use a hash table to figure out unique keys */ 5320 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5321 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5322 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5323 count = 0; 5324 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5325 for (i=0;i<a->B->cmap->n;i++) { 5326 key = a->garray[i]/dof; 5327 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5328 if (!has) { 5329 mapping[i] = count; 5330 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5331 } else { 5332 /* Current 'i' has the same value the previous step */ 5333 mapping[i] = count-1; 5334 } 5335 } 5336 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5337 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5338 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5339 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5340 off = 0; 5341 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5342 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5343 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5344 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5345 /* In case, the matrix was already created but users want to recreate the matrix */ 5346 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5347 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5348 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5349 ierr = ISDestroy(&map);CHKERRQ(ierr); 5350 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5351 } else if (reuse == MAT_REUSE_MATRIX) { 5352 /* If matrix was already created, we simply update values using SF objects 5353 * that as attached to the matrix ealier. 5354 * */ 5355 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5356 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5357 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5358 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5359 /* Update values in place */ 5360 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5361 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5362 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5363 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5364 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5365 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5366 PetscFunctionReturn(0); 5367 } 5368 5369 /*@C 5370 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5371 5372 Collective on Mat 5373 5374 Input Parameters: 5375 + A,B - the matrices in mpiaij format 5376 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5377 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5378 5379 Output Parameter: 5380 + rowb, colb - index sets of rows and columns of B to extract 5381 - B_seq - the sequential matrix generated 5382 5383 Level: developer 5384 5385 @*/ 5386 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5387 { 5388 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5389 PetscErrorCode ierr; 5390 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5391 IS isrowb,iscolb; 5392 Mat *bseq=NULL; 5393 5394 PetscFunctionBegin; 5395 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5396 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5397 } 5398 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5399 5400 if (scall == MAT_INITIAL_MATRIX) { 5401 start = A->cmap->rstart; 5402 cmap = a->garray; 5403 nzA = a->A->cmap->n; 5404 nzB = a->B->cmap->n; 5405 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5406 ncols = 0; 5407 for (i=0; i<nzB; i++) { /* row < local row index */ 5408 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5409 else break; 5410 } 5411 imark = i; 5412 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5413 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5414 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5415 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5416 } else { 5417 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5418 isrowb = *rowb; iscolb = *colb; 5419 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5420 bseq[0] = *B_seq; 5421 } 5422 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5423 *B_seq = bseq[0]; 5424 ierr = PetscFree(bseq);CHKERRQ(ierr); 5425 if (!rowb) { 5426 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5427 } else { 5428 *rowb = isrowb; 5429 } 5430 if (!colb) { 5431 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5432 } else { 5433 *colb = iscolb; 5434 } 5435 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5436 PetscFunctionReturn(0); 5437 } 5438 5439 /* 5440 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5441 of the OFF-DIAGONAL portion of local A 5442 5443 Collective on Mat 5444 5445 Input Parameters: 5446 + A,B - the matrices in mpiaij format 5447 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5448 5449 Output Parameter: 5450 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5451 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5452 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5453 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5454 5455 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5456 for this matrix. This is not desirable.. 5457 5458 Level: developer 5459 5460 */ 5461 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5462 { 5463 PetscErrorCode ierr; 5464 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5465 Mat_SeqAIJ *b_oth; 5466 VecScatter ctx; 5467 MPI_Comm comm; 5468 const PetscMPIInt *rprocs,*sprocs; 5469 const PetscInt *srow,*rstarts,*sstarts; 5470 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5471 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5472 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5473 MPI_Request *rwaits = NULL,*swaits = NULL; 5474 MPI_Status rstatus; 5475 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5476 5477 PetscFunctionBegin; 5478 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5479 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5480 5481 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5482 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5483 } 5484 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5485 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5486 5487 if (size == 1) { 5488 startsj_s = NULL; 5489 bufa_ptr = NULL; 5490 *B_oth = NULL; 5491 PetscFunctionReturn(0); 5492 } 5493 5494 ctx = a->Mvctx; 5495 tag = ((PetscObject)ctx)->tag; 5496 5497 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5498 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5499 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5500 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5501 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5502 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5503 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5504 5505 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5506 if (scall == MAT_INITIAL_MATRIX) { 5507 /* i-array */ 5508 /*---------*/ 5509 /* post receives */ 5510 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5511 for (i=0; i<nrecvs; i++) { 5512 rowlen = rvalues + rstarts[i]*rbs; 5513 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5514 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5515 } 5516 5517 /* pack the outgoing message */ 5518 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5519 5520 sstartsj[0] = 0; 5521 rstartsj[0] = 0; 5522 len = 0; /* total length of j or a array to be sent */ 5523 if (nsends) { 5524 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5525 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5526 } 5527 for (i=0; i<nsends; i++) { 5528 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5529 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5530 for (j=0; j<nrows; j++) { 5531 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5532 for (l=0; l<sbs; l++) { 5533 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5534 5535 rowlen[j*sbs+l] = ncols; 5536 5537 len += ncols; 5538 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5539 } 5540 k++; 5541 } 5542 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5543 5544 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5545 } 5546 /* recvs and sends of i-array are completed */ 5547 i = nrecvs; 5548 while (i--) { 5549 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5550 } 5551 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5552 ierr = PetscFree(svalues);CHKERRQ(ierr); 5553 5554 /* allocate buffers for sending j and a arrays */ 5555 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5556 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5557 5558 /* create i-array of B_oth */ 5559 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5560 5561 b_othi[0] = 0; 5562 len = 0; /* total length of j or a array to be received */ 5563 k = 0; 5564 for (i=0; i<nrecvs; i++) { 5565 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5566 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5567 for (j=0; j<nrows; j++) { 5568 b_othi[k+1] = b_othi[k] + rowlen[j]; 5569 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5570 k++; 5571 } 5572 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5573 } 5574 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5575 5576 /* allocate space for j and a arrrays of B_oth */ 5577 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5578 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5579 5580 /* j-array */ 5581 /*---------*/ 5582 /* post receives of j-array */ 5583 for (i=0; i<nrecvs; i++) { 5584 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5585 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5586 } 5587 5588 /* pack the outgoing message j-array */ 5589 if (nsends) k = sstarts[0]; 5590 for (i=0; i<nsends; i++) { 5591 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5592 bufJ = bufj+sstartsj[i]; 5593 for (j=0; j<nrows; j++) { 5594 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5595 for (ll=0; ll<sbs; ll++) { 5596 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5597 for (l=0; l<ncols; l++) { 5598 *bufJ++ = cols[l]; 5599 } 5600 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5601 } 5602 } 5603 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5604 } 5605 5606 /* recvs and sends of j-array are completed */ 5607 i = nrecvs; 5608 while (i--) { 5609 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5610 } 5611 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5612 } else if (scall == MAT_REUSE_MATRIX) { 5613 sstartsj = *startsj_s; 5614 rstartsj = *startsj_r; 5615 bufa = *bufa_ptr; 5616 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5617 b_otha = b_oth->a; 5618 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5619 5620 /* a-array */ 5621 /*---------*/ 5622 /* post receives of a-array */ 5623 for (i=0; i<nrecvs; i++) { 5624 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5625 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5626 } 5627 5628 /* pack the outgoing message a-array */ 5629 if (nsends) k = sstarts[0]; 5630 for (i=0; i<nsends; i++) { 5631 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5632 bufA = bufa+sstartsj[i]; 5633 for (j=0; j<nrows; j++) { 5634 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5635 for (ll=0; ll<sbs; ll++) { 5636 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5637 for (l=0; l<ncols; l++) { 5638 *bufA++ = vals[l]; 5639 } 5640 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5641 } 5642 } 5643 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5644 } 5645 /* recvs and sends of a-array are completed */ 5646 i = nrecvs; 5647 while (i--) { 5648 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5649 } 5650 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5651 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5652 5653 if (scall == MAT_INITIAL_MATRIX) { 5654 /* put together the new matrix */ 5655 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5656 5657 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5658 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5659 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5660 b_oth->free_a = PETSC_TRUE; 5661 b_oth->free_ij = PETSC_TRUE; 5662 b_oth->nonew = 0; 5663 5664 ierr = PetscFree(bufj);CHKERRQ(ierr); 5665 if (!startsj_s || !bufa_ptr) { 5666 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5667 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5668 } else { 5669 *startsj_s = sstartsj; 5670 *startsj_r = rstartsj; 5671 *bufa_ptr = bufa; 5672 } 5673 } 5674 5675 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5676 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5677 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5678 PetscFunctionReturn(0); 5679 } 5680 5681 /*@C 5682 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5683 5684 Not Collective 5685 5686 Input Parameters: 5687 . A - The matrix in mpiaij format 5688 5689 Output Parameter: 5690 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5691 . colmap - A map from global column index to local index into lvec 5692 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5693 5694 Level: developer 5695 5696 @*/ 5697 #if defined(PETSC_USE_CTABLE) 5698 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5699 #else 5700 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5701 #endif 5702 { 5703 Mat_MPIAIJ *a; 5704 5705 PetscFunctionBegin; 5706 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5707 PetscValidPointer(lvec, 2); 5708 PetscValidPointer(colmap, 3); 5709 PetscValidPointer(multScatter, 4); 5710 a = (Mat_MPIAIJ*) A->data; 5711 if (lvec) *lvec = a->lvec; 5712 if (colmap) *colmap = a->colmap; 5713 if (multScatter) *multScatter = a->Mvctx; 5714 PetscFunctionReturn(0); 5715 } 5716 5717 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5718 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5719 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5720 #if defined(PETSC_HAVE_MKL_SPARSE) 5721 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5722 #endif 5723 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5724 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5725 #if defined(PETSC_HAVE_ELEMENTAL) 5726 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5727 #endif 5728 #if defined(PETSC_HAVE_HYPRE) 5729 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5730 #endif 5731 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5732 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5733 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5734 5735 /* 5736 Computes (B'*A')' since computing B*A directly is untenable 5737 5738 n p p 5739 ( ) ( ) ( ) 5740 m ( A ) * n ( B ) = m ( C ) 5741 ( ) ( ) ( ) 5742 5743 */ 5744 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5745 { 5746 PetscErrorCode ierr; 5747 Mat At,Bt,Ct; 5748 5749 PetscFunctionBegin; 5750 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5751 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5752 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5753 ierr = MatDestroy(&At);CHKERRQ(ierr); 5754 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5755 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5756 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5757 PetscFunctionReturn(0); 5758 } 5759 5760 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5761 { 5762 PetscErrorCode ierr; 5763 PetscBool cisdense; 5764 5765 PetscFunctionBegin; 5766 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5767 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5768 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5769 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5770 if (!cisdense) { 5771 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5772 } 5773 ierr = MatSetUp(C);CHKERRQ(ierr); 5774 5775 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5776 PetscFunctionReturn(0); 5777 } 5778 5779 /* ----------------------------------------------------------------*/ 5780 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5781 { 5782 Mat_Product *product = C->product; 5783 Mat A = product->A,B=product->B; 5784 5785 PetscFunctionBegin; 5786 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5787 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5788 5789 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5790 C->ops->productsymbolic = MatProductSymbolic_AB; 5791 PetscFunctionReturn(0); 5792 } 5793 5794 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5795 { 5796 PetscErrorCode ierr; 5797 Mat_Product *product = C->product; 5798 5799 PetscFunctionBegin; 5800 if (product->type == MATPRODUCT_AB) { 5801 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5802 } 5803 PetscFunctionReturn(0); 5804 } 5805 /* ----------------------------------------------------------------*/ 5806 5807 /*MC 5808 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5809 5810 Options Database Keys: 5811 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5812 5813 Level: beginner 5814 5815 Notes: 5816 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5817 in this case the values associated with the rows and columns one passes in are set to zero 5818 in the matrix 5819 5820 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5821 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5822 5823 .seealso: MatCreateAIJ() 5824 M*/ 5825 5826 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5827 { 5828 Mat_MPIAIJ *b; 5829 PetscErrorCode ierr; 5830 PetscMPIInt size; 5831 5832 PetscFunctionBegin; 5833 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5834 5835 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5836 B->data = (void*)b; 5837 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5838 B->assembled = PETSC_FALSE; 5839 B->insertmode = NOT_SET_VALUES; 5840 b->size = size; 5841 5842 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5843 5844 /* build cache for off array entries formed */ 5845 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5846 5847 b->donotstash = PETSC_FALSE; 5848 b->colmap = 0; 5849 b->garray = 0; 5850 b->roworiented = PETSC_TRUE; 5851 5852 /* stuff used for matrix vector multiply */ 5853 b->lvec = NULL; 5854 b->Mvctx = NULL; 5855 5856 /* stuff for MatGetRow() */ 5857 b->rowindices = 0; 5858 b->rowvalues = 0; 5859 b->getrowactive = PETSC_FALSE; 5860 5861 /* flexible pointer used in CUSP/CUSPARSE classes */ 5862 b->spptr = NULL; 5863 5864 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5865 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5866 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5867 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5868 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5869 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5870 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5871 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5872 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5873 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5874 #if defined(PETSC_HAVE_MKL_SPARSE) 5875 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5876 #endif 5877 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5878 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 5879 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5880 #if defined(PETSC_HAVE_ELEMENTAL) 5881 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5882 #endif 5883 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5884 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5885 #if defined(PETSC_HAVE_HYPRE) 5886 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5887 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5888 #endif 5889 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 5890 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 5891 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5892 PetscFunctionReturn(0); 5893 } 5894 5895 /*@C 5896 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5897 and "off-diagonal" part of the matrix in CSR format. 5898 5899 Collective 5900 5901 Input Parameters: 5902 + comm - MPI communicator 5903 . m - number of local rows (Cannot be PETSC_DECIDE) 5904 . n - This value should be the same as the local size used in creating the 5905 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5906 calculated if N is given) For square matrices n is almost always m. 5907 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5908 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5909 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5910 . j - column indices 5911 . a - matrix values 5912 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5913 . oj - column indices 5914 - oa - matrix values 5915 5916 Output Parameter: 5917 . mat - the matrix 5918 5919 Level: advanced 5920 5921 Notes: 5922 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5923 must free the arrays once the matrix has been destroyed and not before. 5924 5925 The i and j indices are 0 based 5926 5927 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5928 5929 This sets local rows and cannot be used to set off-processor values. 5930 5931 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5932 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5933 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5934 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5935 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5936 communication if it is known that only local entries will be set. 5937 5938 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5939 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5940 @*/ 5941 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5942 { 5943 PetscErrorCode ierr; 5944 Mat_MPIAIJ *maij; 5945 5946 PetscFunctionBegin; 5947 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5948 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5949 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5950 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5951 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5952 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5953 maij = (Mat_MPIAIJ*) (*mat)->data; 5954 5955 (*mat)->preallocated = PETSC_TRUE; 5956 5957 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5958 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5959 5960 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5961 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5962 5963 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5964 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5965 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5966 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5967 5968 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5969 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5970 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5971 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5972 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5973 PetscFunctionReturn(0); 5974 } 5975 5976 /* 5977 Special version for direct calls from Fortran 5978 */ 5979 #include <petsc/private/fortranimpl.h> 5980 5981 /* Change these macros so can be used in void function */ 5982 #undef CHKERRQ 5983 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5984 #undef SETERRQ2 5985 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5986 #undef SETERRQ3 5987 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5988 #undef SETERRQ 5989 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5990 5991 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5992 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5993 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5994 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5995 #else 5996 #endif 5997 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5998 { 5999 Mat mat = *mmat; 6000 PetscInt m = *mm, n = *mn; 6001 InsertMode addv = *maddv; 6002 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6003 PetscScalar value; 6004 PetscErrorCode ierr; 6005 6006 MatCheckPreallocated(mat,1); 6007 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6008 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6009 { 6010 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6011 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6012 PetscBool roworiented = aij->roworiented; 6013 6014 /* Some Variables required in the macro */ 6015 Mat A = aij->A; 6016 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6017 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6018 MatScalar *aa = a->a; 6019 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6020 Mat B = aij->B; 6021 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6022 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6023 MatScalar *ba = b->a; 6024 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6025 * cannot use "#if defined" inside a macro. */ 6026 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6027 6028 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6029 PetscInt nonew = a->nonew; 6030 MatScalar *ap1,*ap2; 6031 6032 PetscFunctionBegin; 6033 for (i=0; i<m; i++) { 6034 if (im[i] < 0) continue; 6035 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6036 if (im[i] >= rstart && im[i] < rend) { 6037 row = im[i] - rstart; 6038 lastcol1 = -1; 6039 rp1 = aj + ai[row]; 6040 ap1 = aa + ai[row]; 6041 rmax1 = aimax[row]; 6042 nrow1 = ailen[row]; 6043 low1 = 0; 6044 high1 = nrow1; 6045 lastcol2 = -1; 6046 rp2 = bj + bi[row]; 6047 ap2 = ba + bi[row]; 6048 rmax2 = bimax[row]; 6049 nrow2 = bilen[row]; 6050 low2 = 0; 6051 high2 = nrow2; 6052 6053 for (j=0; j<n; j++) { 6054 if (roworiented) value = v[i*n+j]; 6055 else value = v[i+j*m]; 6056 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6057 if (in[j] >= cstart && in[j] < cend) { 6058 col = in[j] - cstart; 6059 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6060 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6061 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6062 #endif 6063 } else if (in[j] < 0) continue; 6064 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6065 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6066 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6067 } else { 6068 if (mat->was_assembled) { 6069 if (!aij->colmap) { 6070 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6071 } 6072 #if defined(PETSC_USE_CTABLE) 6073 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6074 col--; 6075 #else 6076 col = aij->colmap[in[j]] - 1; 6077 #endif 6078 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6079 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6080 col = in[j]; 6081 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6082 B = aij->B; 6083 b = (Mat_SeqAIJ*)B->data; 6084 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6085 rp2 = bj + bi[row]; 6086 ap2 = ba + bi[row]; 6087 rmax2 = bimax[row]; 6088 nrow2 = bilen[row]; 6089 low2 = 0; 6090 high2 = nrow2; 6091 bm = aij->B->rmap->n; 6092 ba = b->a; 6093 inserted = PETSC_FALSE; 6094 } 6095 } else col = in[j]; 6096 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6097 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6098 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6099 #endif 6100 } 6101 } 6102 } else if (!aij->donotstash) { 6103 if (roworiented) { 6104 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6105 } else { 6106 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6107 } 6108 } 6109 } 6110 } 6111 PetscFunctionReturnVoid(); 6112 } 6113