1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 796 797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 803 PetscFunctionBegin; 804 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 805 806 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 807 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 808 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 813 { 814 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 816 PetscErrorCode ierr; 817 PetscMPIInt n; 818 PetscInt i,j,rstart,ncols,flg; 819 PetscInt *row,*col; 820 PetscBool other_disassembled; 821 PetscScalar *val; 822 823 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 824 825 PetscFunctionBegin; 826 if (!aij->donotstash && !mat->nooffprocentries) { 827 while (1) { 828 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 829 if (!flg) break; 830 831 for (i=0; i<n; ) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j=i,rstart=row[j]; j<n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j-i; 837 else ncols = n-i; 838 /* Now assemble all these values with a single function call */ 839 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 840 i = j; 841 } 842 } 843 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 844 } 845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 846 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 847 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 848 if (mat->boundtocpu) { 849 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 850 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 851 } 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = 0; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscMPIInt p = 0; 993 PetscSFNode *rrows; 994 PetscSF sf; 995 const PetscScalar *xx; 996 PetscScalar *bb,*mask; 997 Vec xmask,lmask; 998 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 999 const PetscInt *aj, *ii,*ridx; 1000 PetscScalar *aa; 1001 1002 PetscFunctionBegin; 1003 /* Create SF where leaves are input rows and roots are owned rows */ 1004 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1005 for (r = 0; r < n; ++r) lrows[r] = -1; 1006 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1007 for (r = 0; r < N; ++r) { 1008 const PetscInt idx = rows[r]; 1009 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1010 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1011 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1012 } 1013 rrows[r].rank = p; 1014 rrows[r].index = rows[r] - owners[p]; 1015 } 1016 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1017 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1018 /* Collect flags for rows to be zeroed */ 1019 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1022 /* Compress and put in row numbers */ 1023 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1024 /* zero diagonal part of matrix */ 1025 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1026 /* handle off diagonal part of matrix */ 1027 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1028 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1029 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1030 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1036 PetscBool cong; 1037 1038 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1039 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1040 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1043 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1044 } 1045 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1046 /* remove zeroed rows of off diagonal matrix */ 1047 ii = aij->i; 1048 for (i=0; i<len; i++) { 1049 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1050 } 1051 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1052 if (aij->compressedrow.use) { 1053 m = aij->compressedrow.nrows; 1054 ii = aij->compressedrow.i; 1055 ridx = aij->compressedrow.rindex; 1056 for (i=0; i<m; i++) { 1057 n = ii[i+1] - ii[i]; 1058 aj = aij->j + ii[i]; 1059 aa = aij->a + ii[i]; 1060 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[*ridx] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 ridx++; 1070 } 1071 } else { /* do not use compressed row format */ 1072 m = l->B->rmap->n; 1073 for (i=0; i<m; i++) { 1074 n = ii[i+1] - ii[i]; 1075 aj = aij->j + ii[i]; 1076 aa = aij->a + ii[i]; 1077 for (j=0; j<n; j++) { 1078 if (PetscAbsScalar(mask[*aj])) { 1079 if (b) bb[i] -= *aa*xx[*aj]; 1080 *aa = 0.0; 1081 } 1082 aa++; 1083 aj++; 1084 } 1085 } 1086 } 1087 if (x && b) { 1088 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1089 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1090 } 1091 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1092 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1093 ierr = PetscFree(lrows);CHKERRQ(ierr); 1094 1095 /* only change matrix nonzero state if pattern was allowed to be changed */ 1096 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1097 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1098 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 VecScatter Mvctx = a->Mvctx; 1109 1110 PetscFunctionBegin; 1111 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1112 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1113 1114 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1115 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1116 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1117 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1122 { 1123 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 VecScatter Mvctx = a->Mvctx; 1136 1137 PetscFunctionBegin; 1138 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1139 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1140 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1141 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1142 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1147 { 1148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1149 PetscErrorCode ierr; 1150 1151 PetscFunctionBegin; 1152 /* do nondiagonal part */ 1153 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1154 /* do local part */ 1155 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1156 /* add partial results together */ 1157 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1159 PetscFunctionReturn(0); 1160 } 1161 1162 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1163 { 1164 MPI_Comm comm; 1165 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1166 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1167 IS Me,Notme; 1168 PetscErrorCode ierr; 1169 PetscInt M,N,first,last,*notme,i; 1170 PetscBool lf; 1171 PetscMPIInt size; 1172 1173 PetscFunctionBegin; 1174 /* Easy test: symmetric diagonal block */ 1175 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1176 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1177 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1178 if (!*f) PetscFunctionReturn(0); 1179 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1180 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1181 if (size == 1) PetscFunctionReturn(0); 1182 1183 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1184 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1185 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1186 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1187 for (i=0; i<first; i++) notme[i] = i; 1188 for (i=last; i<M; i++) notme[i-last+first] = i; 1189 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1190 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1191 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1192 Aoff = Aoffs[0]; 1193 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1194 Boff = Boffs[0]; 1195 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1197 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1199 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1200 ierr = PetscFree(notme);CHKERRQ(ierr); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1205 { 1206 PetscErrorCode ierr; 1207 1208 PetscFunctionBegin; 1209 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1214 { 1215 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1216 PetscErrorCode ierr; 1217 1218 PetscFunctionBegin; 1219 /* do nondiagonal part */ 1220 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1221 /* do local part */ 1222 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1223 /* add partial results together */ 1224 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 /* 1230 This only works correctly for square matrices where the subblock A->A is the 1231 diagonal block 1232 */ 1233 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1234 { 1235 PetscErrorCode ierr; 1236 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1237 1238 PetscFunctionBegin; 1239 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1240 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1241 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1242 PetscFunctionReturn(0); 1243 } 1244 1245 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1246 { 1247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1248 PetscErrorCode ierr; 1249 1250 PetscFunctionBegin; 1251 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1252 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1257 { 1258 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1259 PetscErrorCode ierr; 1260 1261 PetscFunctionBegin; 1262 #if defined(PETSC_USE_LOG) 1263 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1264 #endif 1265 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1266 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1268 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1269 #if defined(PETSC_USE_CTABLE) 1270 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1271 #else 1272 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1273 #endif 1274 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1275 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1276 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1277 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1278 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1279 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1280 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1281 1282 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1283 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1284 1285 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1295 #if defined(PETSC_HAVE_ELEMENTAL) 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1297 #endif 1298 #if defined(PETSC_HAVE_HYPRE) 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1301 #endif 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1309 { 1310 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1311 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1312 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1313 const PetscInt *garray = aij->garray; 1314 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1315 PetscInt *rowlens; 1316 PetscInt *colidxs; 1317 PetscScalar *matvals; 1318 PetscErrorCode ierr; 1319 1320 PetscFunctionBegin; 1321 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1322 1323 M = mat->rmap->N; 1324 N = mat->cmap->N; 1325 m = mat->rmap->n; 1326 rs = mat->rmap->rstart; 1327 cs = mat->cmap->rstart; 1328 nz = A->nz + B->nz; 1329 1330 /* write matrix header */ 1331 header[0] = MAT_FILE_CLASSID; 1332 header[1] = M; header[2] = N; header[3] = nz; 1333 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1334 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1335 1336 /* fill in and store row lengths */ 1337 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1338 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1339 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1340 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1341 1342 /* fill in and store column indices */ 1343 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1344 for (cnt=0, i=0; i<m; i++) { 1345 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1346 if (garray[B->j[jb]] > cs) break; 1347 colidxs[cnt++] = garray[B->j[jb]]; 1348 } 1349 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1350 colidxs[cnt++] = A->j[ja] + cs; 1351 for (; jb<B->i[i+1]; jb++) 1352 colidxs[cnt++] = garray[B->j[jb]]; 1353 } 1354 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1355 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1356 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1357 1358 /* fill in and store nonzero values */ 1359 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1360 for (cnt=0, i=0; i<m; i++) { 1361 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1362 if (garray[B->j[jb]] > cs) break; 1363 matvals[cnt++] = B->a[jb]; 1364 } 1365 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1366 matvals[cnt++] = A->a[ja]; 1367 for (; jb<B->i[i+1]; jb++) 1368 matvals[cnt++] = B->a[jb]; 1369 } 1370 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1371 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1372 ierr = PetscFree(matvals);CHKERRQ(ierr); 1373 1374 /* write block size option to the viewer's .info file */ 1375 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1376 PetscFunctionReturn(0); 1377 } 1378 1379 #include <petscdraw.h> 1380 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1381 { 1382 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1383 PetscErrorCode ierr; 1384 PetscMPIInt rank = aij->rank,size = aij->size; 1385 PetscBool isdraw,iascii,isbinary; 1386 PetscViewer sviewer; 1387 PetscViewerFormat format; 1388 1389 PetscFunctionBegin; 1390 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1391 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1393 if (iascii) { 1394 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1395 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1396 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1397 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1398 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1399 for (i=0; i<(PetscInt)size; i++) { 1400 nmax = PetscMax(nmax,nz[i]); 1401 nmin = PetscMin(nmin,nz[i]); 1402 navg += nz[i]; 1403 } 1404 ierr = PetscFree(nz);CHKERRQ(ierr); 1405 navg = navg/size; 1406 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1407 PetscFunctionReturn(0); 1408 } 1409 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1410 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1411 MatInfo info; 1412 PetscBool inodes; 1413 1414 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1415 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1416 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1417 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1418 if (!inodes) { 1419 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1420 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1421 } else { 1422 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1423 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1424 } 1425 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1426 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1427 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1429 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1431 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1432 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1433 PetscFunctionReturn(0); 1434 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1435 PetscInt inodecount,inodelimit,*inodes; 1436 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1437 if (inodes) { 1438 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1439 } else { 1440 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1441 } 1442 PetscFunctionReturn(0); 1443 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1444 PetscFunctionReturn(0); 1445 } 1446 } else if (isbinary) { 1447 if (size == 1) { 1448 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1449 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1450 } else { 1451 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1452 } 1453 PetscFunctionReturn(0); 1454 } else if (iascii && size == 1) { 1455 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1456 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1457 PetscFunctionReturn(0); 1458 } else if (isdraw) { 1459 PetscDraw draw; 1460 PetscBool isnull; 1461 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1462 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1463 if (isnull) PetscFunctionReturn(0); 1464 } 1465 1466 { /* assemble the entire matrix onto first processor */ 1467 Mat A = NULL, Av; 1468 IS isrow,iscol; 1469 1470 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1471 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1472 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1473 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1474 /* The commented code uses MatCreateSubMatrices instead */ 1475 /* 1476 Mat *AA, A = NULL, Av; 1477 IS isrow,iscol; 1478 1479 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1480 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1481 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1482 if (!rank) { 1483 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1484 A = AA[0]; 1485 Av = AA[0]; 1486 } 1487 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1488 */ 1489 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1490 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1491 /* 1492 Everyone has to call to draw the matrix since the graphics waits are 1493 synchronized across all processors that share the PetscDraw object 1494 */ 1495 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1496 if (!rank) { 1497 if (((PetscObject)mat)->name) { 1498 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1499 } 1500 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1501 } 1502 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1503 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1504 ierr = MatDestroy(&A);CHKERRQ(ierr); 1505 } 1506 PetscFunctionReturn(0); 1507 } 1508 1509 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1510 { 1511 PetscErrorCode ierr; 1512 PetscBool iascii,isdraw,issocket,isbinary; 1513 1514 PetscFunctionBegin; 1515 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1516 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1519 if (iascii || isdraw || isbinary || issocket) { 1520 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1521 } 1522 PetscFunctionReturn(0); 1523 } 1524 1525 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1526 { 1527 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1528 PetscErrorCode ierr; 1529 Vec bb1 = 0; 1530 PetscBool hasop; 1531 1532 PetscFunctionBegin; 1533 if (flag == SOR_APPLY_UPPER) { 1534 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1535 PetscFunctionReturn(0); 1536 } 1537 1538 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1539 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1540 } 1541 1542 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1543 if (flag & SOR_ZERO_INITIAL_GUESS) { 1544 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1545 its--; 1546 } 1547 1548 while (its--) { 1549 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1550 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1551 1552 /* update rhs: bb1 = bb - B*x */ 1553 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1554 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1555 1556 /* local sweep */ 1557 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1558 } 1559 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1560 if (flag & SOR_ZERO_INITIAL_GUESS) { 1561 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1562 its--; 1563 } 1564 while (its--) { 1565 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1567 1568 /* update rhs: bb1 = bb - B*x */ 1569 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1570 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1571 1572 /* local sweep */ 1573 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1574 } 1575 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1576 if (flag & SOR_ZERO_INITIAL_GUESS) { 1577 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1578 its--; 1579 } 1580 while (its--) { 1581 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1582 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 1584 /* update rhs: bb1 = bb - B*x */ 1585 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1586 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1587 1588 /* local sweep */ 1589 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1590 } 1591 } else if (flag & SOR_EISENSTAT) { 1592 Vec xx1; 1593 1594 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1595 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1596 1597 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1598 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1599 if (!mat->diag) { 1600 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1601 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1602 } 1603 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1604 if (hasop) { 1605 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1606 } else { 1607 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1608 } 1609 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1610 1611 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1612 1613 /* local sweep */ 1614 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1615 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1616 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1617 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1618 1619 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1620 1621 matin->factorerrortype = mat->A->factorerrortype; 1622 PetscFunctionReturn(0); 1623 } 1624 1625 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1626 { 1627 Mat aA,aB,Aperm; 1628 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1629 PetscScalar *aa,*ba; 1630 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1631 PetscSF rowsf,sf; 1632 IS parcolp = NULL; 1633 PetscBool done; 1634 PetscErrorCode ierr; 1635 1636 PetscFunctionBegin; 1637 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1638 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1639 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1640 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1641 1642 /* Invert row permutation to find out where my rows should go */ 1643 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1644 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1645 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1646 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1647 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1648 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1649 1650 /* Invert column permutation to find out where my columns should go */ 1651 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1652 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1653 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1654 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1655 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1656 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1657 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1658 1659 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1660 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1661 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1662 1663 /* Find out where my gcols should go */ 1664 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1665 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1666 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1667 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1668 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1669 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1670 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1671 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1672 1673 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1674 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1675 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1676 for (i=0; i<m; i++) { 1677 PetscInt row = rdest[i]; 1678 PetscMPIInt rowner; 1679 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1680 for (j=ai[i]; j<ai[i+1]; j++) { 1681 PetscInt col = cdest[aj[j]]; 1682 PetscMPIInt cowner; 1683 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1684 if (rowner == cowner) dnnz[i]++; 1685 else onnz[i]++; 1686 } 1687 for (j=bi[i]; j<bi[i+1]; j++) { 1688 PetscInt col = gcdest[bj[j]]; 1689 PetscMPIInt cowner; 1690 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1691 if (rowner == cowner) dnnz[i]++; 1692 else onnz[i]++; 1693 } 1694 } 1695 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1696 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1699 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1700 1701 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1702 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1703 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1704 for (i=0; i<m; i++) { 1705 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1706 PetscInt j0,rowlen; 1707 rowlen = ai[i+1] - ai[i]; 1708 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1709 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1710 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1711 } 1712 rowlen = bi[i+1] - bi[i]; 1713 for (j0=j=0; j<rowlen; j0=j) { 1714 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1715 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1716 } 1717 } 1718 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1719 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1720 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1721 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1722 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1723 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1724 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1725 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1726 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1727 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1728 *B = Aperm; 1729 PetscFunctionReturn(0); 1730 } 1731 1732 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1733 { 1734 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1735 PetscErrorCode ierr; 1736 1737 PetscFunctionBegin; 1738 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1739 if (ghosts) *ghosts = aij->garray; 1740 PetscFunctionReturn(0); 1741 } 1742 1743 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1744 { 1745 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1746 Mat A = mat->A,B = mat->B; 1747 PetscErrorCode ierr; 1748 PetscLogDouble isend[5],irecv[5]; 1749 1750 PetscFunctionBegin; 1751 info->block_size = 1.0; 1752 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1753 1754 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1755 isend[3] = info->memory; isend[4] = info->mallocs; 1756 1757 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1758 1759 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1760 isend[3] += info->memory; isend[4] += info->mallocs; 1761 if (flag == MAT_LOCAL) { 1762 info->nz_used = isend[0]; 1763 info->nz_allocated = isend[1]; 1764 info->nz_unneeded = isend[2]; 1765 info->memory = isend[3]; 1766 info->mallocs = isend[4]; 1767 } else if (flag == MAT_GLOBAL_MAX) { 1768 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1769 1770 info->nz_used = irecv[0]; 1771 info->nz_allocated = irecv[1]; 1772 info->nz_unneeded = irecv[2]; 1773 info->memory = irecv[3]; 1774 info->mallocs = irecv[4]; 1775 } else if (flag == MAT_GLOBAL_SUM) { 1776 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1777 1778 info->nz_used = irecv[0]; 1779 info->nz_allocated = irecv[1]; 1780 info->nz_unneeded = irecv[2]; 1781 info->memory = irecv[3]; 1782 info->mallocs = irecv[4]; 1783 } 1784 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1785 info->fill_ratio_needed = 0; 1786 info->factor_mallocs = 0; 1787 PetscFunctionReturn(0); 1788 } 1789 1790 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1791 { 1792 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1793 PetscErrorCode ierr; 1794 1795 PetscFunctionBegin; 1796 switch (op) { 1797 case MAT_NEW_NONZERO_LOCATIONS: 1798 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1799 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1800 case MAT_KEEP_NONZERO_PATTERN: 1801 case MAT_NEW_NONZERO_LOCATION_ERR: 1802 case MAT_USE_INODES: 1803 case MAT_IGNORE_ZERO_ENTRIES: 1804 MatCheckPreallocated(A,1); 1805 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1806 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1807 break; 1808 case MAT_ROW_ORIENTED: 1809 MatCheckPreallocated(A,1); 1810 a->roworiented = flg; 1811 1812 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1813 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1814 break; 1815 case MAT_NEW_DIAGONALS: 1816 case MAT_SORTED_FULL: 1817 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1818 break; 1819 case MAT_IGNORE_OFF_PROC_ENTRIES: 1820 a->donotstash = flg; 1821 break; 1822 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1823 case MAT_SPD: 1824 case MAT_SYMMETRIC: 1825 case MAT_STRUCTURALLY_SYMMETRIC: 1826 case MAT_HERMITIAN: 1827 case MAT_SYMMETRY_ETERNAL: 1828 break; 1829 case MAT_SUBMAT_SINGLEIS: 1830 A->submat_singleis = flg; 1831 break; 1832 case MAT_STRUCTURE_ONLY: 1833 /* The option is handled directly by MatSetOption() */ 1834 break; 1835 default: 1836 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1837 } 1838 PetscFunctionReturn(0); 1839 } 1840 1841 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1842 { 1843 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1844 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1845 PetscErrorCode ierr; 1846 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1847 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1848 PetscInt *cmap,*idx_p; 1849 1850 PetscFunctionBegin; 1851 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1852 mat->getrowactive = PETSC_TRUE; 1853 1854 if (!mat->rowvalues && (idx || v)) { 1855 /* 1856 allocate enough space to hold information from the longest row. 1857 */ 1858 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1859 PetscInt max = 1,tmp; 1860 for (i=0; i<matin->rmap->n; i++) { 1861 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1862 if (max < tmp) max = tmp; 1863 } 1864 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1865 } 1866 1867 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1868 lrow = row - rstart; 1869 1870 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1871 if (!v) {pvA = 0; pvB = 0;} 1872 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1873 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1874 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1875 nztot = nzA + nzB; 1876 1877 cmap = mat->garray; 1878 if (v || idx) { 1879 if (nztot) { 1880 /* Sort by increasing column numbers, assuming A and B already sorted */ 1881 PetscInt imark = -1; 1882 if (v) { 1883 *v = v_p = mat->rowvalues; 1884 for (i=0; i<nzB; i++) { 1885 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1886 else break; 1887 } 1888 imark = i; 1889 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1890 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1891 } 1892 if (idx) { 1893 *idx = idx_p = mat->rowindices; 1894 if (imark > -1) { 1895 for (i=0; i<imark; i++) { 1896 idx_p[i] = cmap[cworkB[i]]; 1897 } 1898 } else { 1899 for (i=0; i<nzB; i++) { 1900 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1901 else break; 1902 } 1903 imark = i; 1904 } 1905 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1906 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1907 } 1908 } else { 1909 if (idx) *idx = 0; 1910 if (v) *v = 0; 1911 } 1912 } 1913 *nz = nztot; 1914 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1915 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1916 PetscFunctionReturn(0); 1917 } 1918 1919 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1920 { 1921 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1922 1923 PetscFunctionBegin; 1924 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1925 aij->getrowactive = PETSC_FALSE; 1926 PetscFunctionReturn(0); 1927 } 1928 1929 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1930 { 1931 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1932 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1933 PetscErrorCode ierr; 1934 PetscInt i,j,cstart = mat->cmap->rstart; 1935 PetscReal sum = 0.0; 1936 MatScalar *v; 1937 1938 PetscFunctionBegin; 1939 if (aij->size == 1) { 1940 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1941 } else { 1942 if (type == NORM_FROBENIUS) { 1943 v = amat->a; 1944 for (i=0; i<amat->nz; i++) { 1945 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1946 } 1947 v = bmat->a; 1948 for (i=0; i<bmat->nz; i++) { 1949 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1950 } 1951 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1952 *norm = PetscSqrtReal(*norm); 1953 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1954 } else if (type == NORM_1) { /* max column norm */ 1955 PetscReal *tmp,*tmp2; 1956 PetscInt *jj,*garray = aij->garray; 1957 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1958 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1959 *norm = 0.0; 1960 v = amat->a; jj = amat->j; 1961 for (j=0; j<amat->nz; j++) { 1962 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1963 } 1964 v = bmat->a; jj = bmat->j; 1965 for (j=0; j<bmat->nz; j++) { 1966 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1967 } 1968 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1969 for (j=0; j<mat->cmap->N; j++) { 1970 if (tmp2[j] > *norm) *norm = tmp2[j]; 1971 } 1972 ierr = PetscFree(tmp);CHKERRQ(ierr); 1973 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1974 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1975 } else if (type == NORM_INFINITY) { /* max row norm */ 1976 PetscReal ntemp = 0.0; 1977 for (j=0; j<aij->A->rmap->n; j++) { 1978 v = amat->a + amat->i[j]; 1979 sum = 0.0; 1980 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1981 sum += PetscAbsScalar(*v); v++; 1982 } 1983 v = bmat->a + bmat->i[j]; 1984 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1985 sum += PetscAbsScalar(*v); v++; 1986 } 1987 if (sum > ntemp) ntemp = sum; 1988 } 1989 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1990 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1991 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1992 } 1993 PetscFunctionReturn(0); 1994 } 1995 1996 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1997 { 1998 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 1999 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2000 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2001 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2002 PetscErrorCode ierr; 2003 Mat B,A_diag,*B_diag; 2004 const MatScalar *array; 2005 2006 PetscFunctionBegin; 2007 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2008 ai = Aloc->i; aj = Aloc->j; 2009 bi = Bloc->i; bj = Bloc->j; 2010 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2011 PetscInt *d_nnz,*g_nnz,*o_nnz; 2012 PetscSFNode *oloc; 2013 PETSC_UNUSED PetscSF sf; 2014 2015 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2016 /* compute d_nnz for preallocation */ 2017 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2018 for (i=0; i<ai[ma]; i++) { 2019 d_nnz[aj[i]]++; 2020 } 2021 /* compute local off-diagonal contributions */ 2022 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2023 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2024 /* map those to global */ 2025 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2026 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2027 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2028 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2029 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2030 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2031 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2032 2033 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2034 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2035 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2036 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2037 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2038 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2039 } else { 2040 B = *matout; 2041 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2042 } 2043 2044 b = (Mat_MPIAIJ*)B->data; 2045 A_diag = a->A; 2046 B_diag = &b->A; 2047 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2048 A_diag_ncol = A_diag->cmap->N; 2049 B_diag_ilen = sub_B_diag->ilen; 2050 B_diag_i = sub_B_diag->i; 2051 2052 /* Set ilen for diagonal of B */ 2053 for (i=0; i<A_diag_ncol; i++) { 2054 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2055 } 2056 2057 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2058 very quickly (=without using MatSetValues), because all writes are local. */ 2059 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2060 2061 /* copy over the B part */ 2062 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2063 array = Bloc->a; 2064 row = A->rmap->rstart; 2065 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2066 cols_tmp = cols; 2067 for (i=0; i<mb; i++) { 2068 ncol = bi[i+1]-bi[i]; 2069 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2070 row++; 2071 array += ncol; cols_tmp += ncol; 2072 } 2073 ierr = PetscFree(cols);CHKERRQ(ierr); 2074 2075 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2076 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2077 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2078 *matout = B; 2079 } else { 2080 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2081 } 2082 PetscFunctionReturn(0); 2083 } 2084 2085 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2086 { 2087 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2088 Mat a = aij->A,b = aij->B; 2089 PetscErrorCode ierr; 2090 PetscInt s1,s2,s3; 2091 2092 PetscFunctionBegin; 2093 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2094 if (rr) { 2095 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2096 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2097 /* Overlap communication with computation. */ 2098 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2099 } 2100 if (ll) { 2101 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2102 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2103 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2104 } 2105 /* scale the diagonal block */ 2106 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2107 2108 if (rr) { 2109 /* Do a scatter end and then right scale the off-diagonal block */ 2110 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2111 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2112 } 2113 PetscFunctionReturn(0); 2114 } 2115 2116 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2117 { 2118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2119 PetscErrorCode ierr; 2120 2121 PetscFunctionBegin; 2122 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2123 PetscFunctionReturn(0); 2124 } 2125 2126 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2127 { 2128 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2129 Mat a,b,c,d; 2130 PetscBool flg; 2131 PetscErrorCode ierr; 2132 2133 PetscFunctionBegin; 2134 a = matA->A; b = matA->B; 2135 c = matB->A; d = matB->B; 2136 2137 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2138 if (flg) { 2139 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2140 } 2141 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2146 { 2147 PetscErrorCode ierr; 2148 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2149 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2150 2151 PetscFunctionBegin; 2152 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2153 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2154 /* because of the column compression in the off-processor part of the matrix a->B, 2155 the number of columns in a->B and b->B may be different, hence we cannot call 2156 the MatCopy() directly on the two parts. If need be, we can provide a more 2157 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2158 then copying the submatrices */ 2159 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2160 } else { 2161 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2162 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2163 } 2164 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2169 { 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2174 PetscFunctionReturn(0); 2175 } 2176 2177 /* 2178 Computes the number of nonzeros per row needed for preallocation when X and Y 2179 have different nonzero structure. 2180 */ 2181 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2182 { 2183 PetscInt i,j,k,nzx,nzy; 2184 2185 PetscFunctionBegin; 2186 /* Set the number of nonzeros in the new matrix */ 2187 for (i=0; i<m; i++) { 2188 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2189 nzx = xi[i+1] - xi[i]; 2190 nzy = yi[i+1] - yi[i]; 2191 nnz[i] = 0; 2192 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2193 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2194 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2195 nnz[i]++; 2196 } 2197 for (; k<nzy; k++) nnz[i]++; 2198 } 2199 PetscFunctionReturn(0); 2200 } 2201 2202 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2203 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2204 { 2205 PetscErrorCode ierr; 2206 PetscInt m = Y->rmap->N; 2207 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2208 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2209 2210 PetscFunctionBegin; 2211 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2212 PetscFunctionReturn(0); 2213 } 2214 2215 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2216 { 2217 PetscErrorCode ierr; 2218 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2219 PetscBLASInt bnz,one=1; 2220 Mat_SeqAIJ *x,*y; 2221 2222 PetscFunctionBegin; 2223 if (str == SAME_NONZERO_PATTERN) { 2224 PetscScalar alpha = a; 2225 x = (Mat_SeqAIJ*)xx->A->data; 2226 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2227 y = (Mat_SeqAIJ*)yy->A->data; 2228 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2229 x = (Mat_SeqAIJ*)xx->B->data; 2230 y = (Mat_SeqAIJ*)yy->B->data; 2231 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2232 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2233 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2234 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2235 will be updated */ 2236 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2237 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2238 Y->offloadmask = PETSC_OFFLOAD_CPU; 2239 } 2240 #endif 2241 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2242 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2243 } else { 2244 Mat B; 2245 PetscInt *nnz_d,*nnz_o; 2246 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2247 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2248 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2249 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2250 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2251 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2252 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2253 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2254 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2255 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2256 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2257 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2258 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2259 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2260 } 2261 PetscFunctionReturn(0); 2262 } 2263 2264 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2265 2266 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2267 { 2268 #if defined(PETSC_USE_COMPLEX) 2269 PetscErrorCode ierr; 2270 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2271 2272 PetscFunctionBegin; 2273 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2274 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2275 #else 2276 PetscFunctionBegin; 2277 #endif 2278 PetscFunctionReturn(0); 2279 } 2280 2281 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2282 { 2283 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2284 PetscErrorCode ierr; 2285 2286 PetscFunctionBegin; 2287 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2288 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2289 PetscFunctionReturn(0); 2290 } 2291 2292 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2293 { 2294 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2295 PetscErrorCode ierr; 2296 2297 PetscFunctionBegin; 2298 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2299 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2304 { 2305 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2306 PetscErrorCode ierr; 2307 PetscInt i,*idxb = 0; 2308 PetscScalar *va,*vb; 2309 Vec vtmp; 2310 2311 PetscFunctionBegin; 2312 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2313 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2314 if (idx) { 2315 for (i=0; i<A->rmap->n; i++) { 2316 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2317 } 2318 } 2319 2320 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2321 if (idx) { 2322 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2323 } 2324 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2325 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2326 2327 for (i=0; i<A->rmap->n; i++) { 2328 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2329 va[i] = vb[i]; 2330 if (idx) idx[i] = a->garray[idxb[i]]; 2331 } 2332 } 2333 2334 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2335 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2336 ierr = PetscFree(idxb);CHKERRQ(ierr); 2337 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2338 PetscFunctionReturn(0); 2339 } 2340 2341 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2342 { 2343 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2344 PetscErrorCode ierr; 2345 PetscInt i,*idxb = 0; 2346 PetscScalar *va,*vb; 2347 Vec vtmp; 2348 2349 PetscFunctionBegin; 2350 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2351 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2352 if (idx) { 2353 for (i=0; i<A->cmap->n; i++) { 2354 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2355 } 2356 } 2357 2358 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2359 if (idx) { 2360 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2361 } 2362 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2363 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2364 2365 for (i=0; i<A->rmap->n; i++) { 2366 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2367 va[i] = vb[i]; 2368 if (idx) idx[i] = a->garray[idxb[i]]; 2369 } 2370 } 2371 2372 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2373 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2374 ierr = PetscFree(idxb);CHKERRQ(ierr); 2375 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2376 PetscFunctionReturn(0); 2377 } 2378 2379 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2380 { 2381 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2382 PetscInt n = A->rmap->n; 2383 PetscInt cstart = A->cmap->rstart; 2384 PetscInt *cmap = mat->garray; 2385 PetscInt *diagIdx, *offdiagIdx; 2386 Vec diagV, offdiagV; 2387 PetscScalar *a, *diagA, *offdiagA; 2388 PetscInt r; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2393 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2394 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2395 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2396 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2397 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2398 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2399 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2400 for (r = 0; r < n; ++r) { 2401 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2402 a[r] = diagA[r]; 2403 idx[r] = cstart + diagIdx[r]; 2404 } else { 2405 a[r] = offdiagA[r]; 2406 idx[r] = cmap[offdiagIdx[r]]; 2407 } 2408 } 2409 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2410 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2411 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2412 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2413 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2414 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2415 PetscFunctionReturn(0); 2416 } 2417 2418 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2419 { 2420 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2421 PetscInt n = A->rmap->n; 2422 PetscInt cstart = A->cmap->rstart; 2423 PetscInt *cmap = mat->garray; 2424 PetscInt *diagIdx, *offdiagIdx; 2425 Vec diagV, offdiagV; 2426 PetscScalar *a, *diagA, *offdiagA; 2427 PetscInt r; 2428 PetscErrorCode ierr; 2429 2430 PetscFunctionBegin; 2431 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2432 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2433 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2434 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2435 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2436 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2437 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2438 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2439 for (r = 0; r < n; ++r) { 2440 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2441 a[r] = diagA[r]; 2442 idx[r] = cstart + diagIdx[r]; 2443 } else { 2444 a[r] = offdiagA[r]; 2445 idx[r] = cmap[offdiagIdx[r]]; 2446 } 2447 } 2448 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2449 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2450 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2451 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2452 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2453 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2458 { 2459 PetscErrorCode ierr; 2460 Mat *dummy; 2461 2462 PetscFunctionBegin; 2463 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2464 *newmat = *dummy; 2465 ierr = PetscFree(dummy);CHKERRQ(ierr); 2466 PetscFunctionReturn(0); 2467 } 2468 2469 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2470 { 2471 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2472 PetscErrorCode ierr; 2473 2474 PetscFunctionBegin; 2475 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2476 A->factorerrortype = a->A->factorerrortype; 2477 PetscFunctionReturn(0); 2478 } 2479 2480 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2481 { 2482 PetscErrorCode ierr; 2483 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2484 2485 PetscFunctionBegin; 2486 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2487 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2488 if (x->assembled) { 2489 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2490 } else { 2491 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2492 } 2493 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2494 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2495 PetscFunctionReturn(0); 2496 } 2497 2498 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2499 { 2500 PetscFunctionBegin; 2501 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2502 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2503 PetscFunctionReturn(0); 2504 } 2505 2506 /*@ 2507 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2508 2509 Collective on Mat 2510 2511 Input Parameters: 2512 + A - the matrix 2513 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2514 2515 Level: advanced 2516 2517 @*/ 2518 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2519 { 2520 PetscErrorCode ierr; 2521 2522 PetscFunctionBegin; 2523 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2524 PetscFunctionReturn(0); 2525 } 2526 2527 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2528 { 2529 PetscErrorCode ierr; 2530 PetscBool sc = PETSC_FALSE,flg; 2531 2532 PetscFunctionBegin; 2533 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2534 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2535 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2536 if (flg) { 2537 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2538 } 2539 ierr = PetscOptionsTail();CHKERRQ(ierr); 2540 PetscFunctionReturn(0); 2541 } 2542 2543 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2544 { 2545 PetscErrorCode ierr; 2546 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2547 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2548 2549 PetscFunctionBegin; 2550 if (!Y->preallocated) { 2551 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2552 } else if (!aij->nz) { 2553 PetscInt nonew = aij->nonew; 2554 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2555 aij->nonew = nonew; 2556 } 2557 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2558 PetscFunctionReturn(0); 2559 } 2560 2561 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2562 { 2563 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2564 PetscErrorCode ierr; 2565 2566 PetscFunctionBegin; 2567 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2568 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2569 if (d) { 2570 PetscInt rstart; 2571 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2572 *d += rstart; 2573 2574 } 2575 PetscFunctionReturn(0); 2576 } 2577 2578 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2579 { 2580 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2581 PetscErrorCode ierr; 2582 2583 PetscFunctionBegin; 2584 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2585 PetscFunctionReturn(0); 2586 } 2587 2588 /* -------------------------------------------------------------------*/ 2589 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2590 MatGetRow_MPIAIJ, 2591 MatRestoreRow_MPIAIJ, 2592 MatMult_MPIAIJ, 2593 /* 4*/ MatMultAdd_MPIAIJ, 2594 MatMultTranspose_MPIAIJ, 2595 MatMultTransposeAdd_MPIAIJ, 2596 0, 2597 0, 2598 0, 2599 /*10*/ 0, 2600 0, 2601 0, 2602 MatSOR_MPIAIJ, 2603 MatTranspose_MPIAIJ, 2604 /*15*/ MatGetInfo_MPIAIJ, 2605 MatEqual_MPIAIJ, 2606 MatGetDiagonal_MPIAIJ, 2607 MatDiagonalScale_MPIAIJ, 2608 MatNorm_MPIAIJ, 2609 /*20*/ MatAssemblyBegin_MPIAIJ, 2610 MatAssemblyEnd_MPIAIJ, 2611 MatSetOption_MPIAIJ, 2612 MatZeroEntries_MPIAIJ, 2613 /*24*/ MatZeroRows_MPIAIJ, 2614 0, 2615 0, 2616 0, 2617 0, 2618 /*29*/ MatSetUp_MPIAIJ, 2619 0, 2620 0, 2621 MatGetDiagonalBlock_MPIAIJ, 2622 0, 2623 /*34*/ MatDuplicate_MPIAIJ, 2624 0, 2625 0, 2626 0, 2627 0, 2628 /*39*/ MatAXPY_MPIAIJ, 2629 MatCreateSubMatrices_MPIAIJ, 2630 MatIncreaseOverlap_MPIAIJ, 2631 MatGetValues_MPIAIJ, 2632 MatCopy_MPIAIJ, 2633 /*44*/ MatGetRowMax_MPIAIJ, 2634 MatScale_MPIAIJ, 2635 MatShift_MPIAIJ, 2636 MatDiagonalSet_MPIAIJ, 2637 MatZeroRowsColumns_MPIAIJ, 2638 /*49*/ MatSetRandom_MPIAIJ, 2639 0, 2640 0, 2641 0, 2642 0, 2643 /*54*/ MatFDColoringCreate_MPIXAIJ, 2644 0, 2645 MatSetUnfactored_MPIAIJ, 2646 MatPermute_MPIAIJ, 2647 0, 2648 /*59*/ MatCreateSubMatrix_MPIAIJ, 2649 MatDestroy_MPIAIJ, 2650 MatView_MPIAIJ, 2651 0, 2652 0, 2653 /*64*/ 0, 2654 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2655 0, 2656 0, 2657 0, 2658 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2659 MatGetRowMinAbs_MPIAIJ, 2660 0, 2661 0, 2662 0, 2663 0, 2664 /*75*/ MatFDColoringApply_AIJ, 2665 MatSetFromOptions_MPIAIJ, 2666 0, 2667 0, 2668 MatFindZeroDiagonals_MPIAIJ, 2669 /*80*/ 0, 2670 0, 2671 0, 2672 /*83*/ MatLoad_MPIAIJ, 2673 MatIsSymmetric_MPIAIJ, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /*89*/ 0, 2679 0, 2680 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2681 0, 2682 0, 2683 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2684 0, 2685 0, 2686 0, 2687 MatBindToCPU_MPIAIJ, 2688 /*99*/ MatProductSetFromOptions_MPIAIJ, 2689 0, 2690 0, 2691 MatConjugate_MPIAIJ, 2692 0, 2693 /*104*/MatSetValuesRow_MPIAIJ, 2694 MatRealPart_MPIAIJ, 2695 MatImaginaryPart_MPIAIJ, 2696 0, 2697 0, 2698 /*109*/0, 2699 0, 2700 MatGetRowMin_MPIAIJ, 2701 0, 2702 MatMissingDiagonal_MPIAIJ, 2703 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2704 0, 2705 MatGetGhosts_MPIAIJ, 2706 0, 2707 0, 2708 /*119*/0, 2709 0, 2710 0, 2711 0, 2712 MatGetMultiProcBlock_MPIAIJ, 2713 /*124*/MatFindNonzeroRows_MPIAIJ, 2714 MatGetColumnNorms_MPIAIJ, 2715 MatInvertBlockDiagonal_MPIAIJ, 2716 MatInvertVariableBlockDiagonal_MPIAIJ, 2717 MatCreateSubMatricesMPI_MPIAIJ, 2718 /*129*/0, 2719 0, 2720 0, 2721 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2722 0, 2723 /*134*/0, 2724 0, 2725 0, 2726 0, 2727 0, 2728 /*139*/MatSetBlockSizes_MPIAIJ, 2729 0, 2730 0, 2731 MatFDColoringSetUp_MPIXAIJ, 2732 MatFindOffBlockDiagonalEntries_MPIAIJ, 2733 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2734 /*145*/0, 2735 0, 2736 0 2737 }; 2738 2739 /* ----------------------------------------------------------------------------------------*/ 2740 2741 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2742 { 2743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2748 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 2752 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2753 { 2754 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2759 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2760 PetscFunctionReturn(0); 2761 } 2762 2763 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2764 { 2765 Mat_MPIAIJ *b; 2766 PetscErrorCode ierr; 2767 PetscMPIInt size; 2768 2769 PetscFunctionBegin; 2770 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2771 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2772 b = (Mat_MPIAIJ*)B->data; 2773 2774 #if defined(PETSC_USE_CTABLE) 2775 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2776 #else 2777 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2778 #endif 2779 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2780 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2781 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2782 2783 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2784 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2785 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2786 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2787 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2788 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2789 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2791 2792 if (!B->preallocated) { 2793 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2794 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2795 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2796 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2797 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2798 } 2799 2800 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2801 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2802 B->preallocated = PETSC_TRUE; 2803 B->was_assembled = PETSC_FALSE; 2804 B->assembled = PETSC_FALSE; 2805 PetscFunctionReturn(0); 2806 } 2807 2808 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2809 { 2810 Mat_MPIAIJ *b; 2811 PetscErrorCode ierr; 2812 2813 PetscFunctionBegin; 2814 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2815 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2816 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2817 b = (Mat_MPIAIJ*)B->data; 2818 2819 #if defined(PETSC_USE_CTABLE) 2820 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2821 #else 2822 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2823 #endif 2824 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2825 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2826 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2827 2828 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2829 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2830 B->preallocated = PETSC_TRUE; 2831 B->was_assembled = PETSC_FALSE; 2832 B->assembled = PETSC_FALSE; 2833 PetscFunctionReturn(0); 2834 } 2835 2836 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2837 { 2838 Mat mat; 2839 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2840 PetscErrorCode ierr; 2841 2842 PetscFunctionBegin; 2843 *newmat = 0; 2844 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2845 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2846 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2847 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2848 a = (Mat_MPIAIJ*)mat->data; 2849 2850 mat->factortype = matin->factortype; 2851 mat->assembled = matin->assembled; 2852 mat->insertmode = NOT_SET_VALUES; 2853 mat->preallocated = matin->preallocated; 2854 2855 a->size = oldmat->size; 2856 a->rank = oldmat->rank; 2857 a->donotstash = oldmat->donotstash; 2858 a->roworiented = oldmat->roworiented; 2859 a->rowindices = NULL; 2860 a->rowvalues = NULL; 2861 a->getrowactive = PETSC_FALSE; 2862 2863 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2864 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2865 2866 if (oldmat->colmap) { 2867 #if defined(PETSC_USE_CTABLE) 2868 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2869 #else 2870 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2871 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2872 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2873 #endif 2874 } else a->colmap = NULL; 2875 if (oldmat->garray) { 2876 PetscInt len; 2877 len = oldmat->B->cmap->n; 2878 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2879 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2880 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2881 } else a->garray = NULL; 2882 2883 /* It may happen MatDuplicate is called with a non-assembled matrix 2884 In fact, MatDuplicate only requires the matrix to be preallocated 2885 This may happen inside a DMCreateMatrix_Shell */ 2886 if (oldmat->lvec) { 2887 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2888 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2889 } 2890 if (oldmat->Mvctx) { 2891 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2892 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2893 } 2894 if (oldmat->Mvctx_mpi1) { 2895 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2896 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2897 } 2898 2899 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2900 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2901 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2902 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2903 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2904 *newmat = mat; 2905 PetscFunctionReturn(0); 2906 } 2907 2908 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2909 { 2910 PetscBool isbinary, ishdf5; 2911 PetscErrorCode ierr; 2912 2913 PetscFunctionBegin; 2914 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2915 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2916 /* force binary viewer to load .info file if it has not yet done so */ 2917 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2918 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2919 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2920 if (isbinary) { 2921 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2922 } else if (ishdf5) { 2923 #if defined(PETSC_HAVE_HDF5) 2924 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2925 #else 2926 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2927 #endif 2928 } else { 2929 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2930 } 2931 PetscFunctionReturn(0); 2932 } 2933 2934 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2935 { 2936 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2937 PetscInt *rowidxs,*colidxs; 2938 PetscScalar *matvals; 2939 PetscErrorCode ierr; 2940 2941 PetscFunctionBegin; 2942 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2943 2944 /* read in matrix header */ 2945 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2946 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2947 M = header[1]; N = header[2]; nz = header[3]; 2948 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 2949 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 2950 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2951 2952 /* set block sizes from the viewer's .info file */ 2953 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 2954 /* set global sizes if not set already */ 2955 if (mat->rmap->N < 0) mat->rmap->N = M; 2956 if (mat->cmap->N < 0) mat->cmap->N = N; 2957 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 2958 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 2959 2960 /* check if the matrix sizes are correct */ 2961 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2962 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 2963 2964 /* read in row lengths and build row indices */ 2965 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 2966 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 2967 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 2968 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 2969 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 2970 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 2971 /* read in column indices and matrix values */ 2972 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 2973 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 2974 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 2975 /* store matrix indices and values */ 2976 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 2977 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 2978 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 2979 PetscFunctionReturn(0); 2980 } 2981 2982 /* Not scalable because of ISAllGather() unless getting all columns. */ 2983 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2984 { 2985 PetscErrorCode ierr; 2986 IS iscol_local; 2987 PetscBool isstride; 2988 PetscMPIInt lisstride=0,gisstride; 2989 2990 PetscFunctionBegin; 2991 /* check if we are grabbing all columns*/ 2992 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2993 2994 if (isstride) { 2995 PetscInt start,len,mstart,mlen; 2996 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2997 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2998 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 2999 if (mstart == start && mlen-mstart == len) lisstride = 1; 3000 } 3001 3002 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3003 if (gisstride) { 3004 PetscInt N; 3005 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3006 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3007 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3008 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3009 } else { 3010 PetscInt cbs; 3011 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3012 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3013 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3014 } 3015 3016 *isseq = iscol_local; 3017 PetscFunctionReturn(0); 3018 } 3019 3020 /* 3021 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3022 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3023 3024 Input Parameters: 3025 mat - matrix 3026 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3027 i.e., mat->rstart <= isrow[i] < mat->rend 3028 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3029 i.e., mat->cstart <= iscol[i] < mat->cend 3030 Output Parameter: 3031 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3032 iscol_o - sequential column index set for retrieving mat->B 3033 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3034 */ 3035 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3036 { 3037 PetscErrorCode ierr; 3038 Vec x,cmap; 3039 const PetscInt *is_idx; 3040 PetscScalar *xarray,*cmaparray; 3041 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3042 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3043 Mat B=a->B; 3044 Vec lvec=a->lvec,lcmap; 3045 PetscInt i,cstart,cend,Bn=B->cmap->N; 3046 MPI_Comm comm; 3047 VecScatter Mvctx=a->Mvctx; 3048 3049 PetscFunctionBegin; 3050 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3051 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3052 3053 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3054 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3055 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3056 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3057 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3058 3059 /* Get start indices */ 3060 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3061 isstart -= ncols; 3062 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3063 3064 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3065 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3066 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3067 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3068 for (i=0; i<ncols; i++) { 3069 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3070 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3071 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3072 } 3073 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3074 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3075 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3076 3077 /* Get iscol_d */ 3078 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3079 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3080 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3081 3082 /* Get isrow_d */ 3083 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3084 rstart = mat->rmap->rstart; 3085 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3086 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3087 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3088 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3089 3090 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3091 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3092 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3093 3094 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3095 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3096 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3097 3098 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3099 3100 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3101 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3102 3103 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3104 /* off-process column indices */ 3105 count = 0; 3106 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3107 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3108 3109 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3110 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3111 for (i=0; i<Bn; i++) { 3112 if (PetscRealPart(xarray[i]) > -1.0) { 3113 idx[count] = i; /* local column index in off-diagonal part B */ 3114 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3115 count++; 3116 } 3117 } 3118 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3119 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3120 3121 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3122 /* cannot ensure iscol_o has same blocksize as iscol! */ 3123 3124 ierr = PetscFree(idx);CHKERRQ(ierr); 3125 *garray = cmap1; 3126 3127 ierr = VecDestroy(&x);CHKERRQ(ierr); 3128 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3129 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3130 PetscFunctionReturn(0); 3131 } 3132 3133 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3134 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3135 { 3136 PetscErrorCode ierr; 3137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3138 Mat M = NULL; 3139 MPI_Comm comm; 3140 IS iscol_d,isrow_d,iscol_o; 3141 Mat Asub = NULL,Bsub = NULL; 3142 PetscInt n; 3143 3144 PetscFunctionBegin; 3145 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3146 3147 if (call == MAT_REUSE_MATRIX) { 3148 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3149 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3150 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3151 3152 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3153 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3154 3155 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3156 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3157 3158 /* Update diagonal and off-diagonal portions of submat */ 3159 asub = (Mat_MPIAIJ*)(*submat)->data; 3160 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3161 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3162 if (n) { 3163 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3164 } 3165 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3166 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3167 3168 } else { /* call == MAT_INITIAL_MATRIX) */ 3169 const PetscInt *garray; 3170 PetscInt BsubN; 3171 3172 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3173 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3174 3175 /* Create local submatrices Asub and Bsub */ 3176 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3177 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3178 3179 /* Create submatrix M */ 3180 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3181 3182 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3183 asub = (Mat_MPIAIJ*)M->data; 3184 3185 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3186 n = asub->B->cmap->N; 3187 if (BsubN > n) { 3188 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3189 const PetscInt *idx; 3190 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3191 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3192 3193 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3194 j = 0; 3195 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3196 for (i=0; i<n; i++) { 3197 if (j >= BsubN) break; 3198 while (subgarray[i] > garray[j]) j++; 3199 3200 if (subgarray[i] == garray[j]) { 3201 idx_new[i] = idx[j++]; 3202 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3203 } 3204 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3205 3206 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3207 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3208 3209 } else if (BsubN < n) { 3210 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3211 } 3212 3213 ierr = PetscFree(garray);CHKERRQ(ierr); 3214 *submat = M; 3215 3216 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3217 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3218 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3219 3220 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3221 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3222 3223 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3224 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3225 } 3226 PetscFunctionReturn(0); 3227 } 3228 3229 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3230 { 3231 PetscErrorCode ierr; 3232 IS iscol_local=NULL,isrow_d; 3233 PetscInt csize; 3234 PetscInt n,i,j,start,end; 3235 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3236 MPI_Comm comm; 3237 3238 PetscFunctionBegin; 3239 /* If isrow has same processor distribution as mat, 3240 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3241 if (call == MAT_REUSE_MATRIX) { 3242 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3243 if (isrow_d) { 3244 sameRowDist = PETSC_TRUE; 3245 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3246 } else { 3247 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3248 if (iscol_local) { 3249 sameRowDist = PETSC_TRUE; 3250 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3251 } 3252 } 3253 } else { 3254 /* Check if isrow has same processor distribution as mat */ 3255 sameDist[0] = PETSC_FALSE; 3256 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3257 if (!n) { 3258 sameDist[0] = PETSC_TRUE; 3259 } else { 3260 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3261 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3262 if (i >= start && j < end) { 3263 sameDist[0] = PETSC_TRUE; 3264 } 3265 } 3266 3267 /* Check if iscol has same processor distribution as mat */ 3268 sameDist[1] = PETSC_FALSE; 3269 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3270 if (!n) { 3271 sameDist[1] = PETSC_TRUE; 3272 } else { 3273 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3274 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3275 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3276 } 3277 3278 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3279 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3280 sameRowDist = tsameDist[0]; 3281 } 3282 3283 if (sameRowDist) { 3284 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3285 /* isrow and iscol have same processor distribution as mat */ 3286 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3287 PetscFunctionReturn(0); 3288 } else { /* sameRowDist */ 3289 /* isrow has same processor distribution as mat */ 3290 if (call == MAT_INITIAL_MATRIX) { 3291 PetscBool sorted; 3292 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3293 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3294 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3295 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3296 3297 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3298 if (sorted) { 3299 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3300 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3301 PetscFunctionReturn(0); 3302 } 3303 } else { /* call == MAT_REUSE_MATRIX */ 3304 IS iscol_sub; 3305 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3306 if (iscol_sub) { 3307 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3308 PetscFunctionReturn(0); 3309 } 3310 } 3311 } 3312 } 3313 3314 /* General case: iscol -> iscol_local which has global size of iscol */ 3315 if (call == MAT_REUSE_MATRIX) { 3316 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3317 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3318 } else { 3319 if (!iscol_local) { 3320 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3321 } 3322 } 3323 3324 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3325 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3326 3327 if (call == MAT_INITIAL_MATRIX) { 3328 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3329 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3330 } 3331 PetscFunctionReturn(0); 3332 } 3333 3334 /*@C 3335 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3336 and "off-diagonal" part of the matrix in CSR format. 3337 3338 Collective 3339 3340 Input Parameters: 3341 + comm - MPI communicator 3342 . A - "diagonal" portion of matrix 3343 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3344 - garray - global index of B columns 3345 3346 Output Parameter: 3347 . mat - the matrix, with input A as its local diagonal matrix 3348 Level: advanced 3349 3350 Notes: 3351 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3352 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3353 3354 .seealso: MatCreateMPIAIJWithSplitArrays() 3355 @*/ 3356 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3357 { 3358 PetscErrorCode ierr; 3359 Mat_MPIAIJ *maij; 3360 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3361 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3362 PetscScalar *oa=b->a; 3363 Mat Bnew; 3364 PetscInt m,n,N; 3365 3366 PetscFunctionBegin; 3367 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3368 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3369 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3370 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3371 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3372 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3373 3374 /* Get global columns of mat */ 3375 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3376 3377 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3378 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3379 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3380 maij = (Mat_MPIAIJ*)(*mat)->data; 3381 3382 (*mat)->preallocated = PETSC_TRUE; 3383 3384 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3385 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3386 3387 /* Set A as diagonal portion of *mat */ 3388 maij->A = A; 3389 3390 nz = oi[m]; 3391 for (i=0; i<nz; i++) { 3392 col = oj[i]; 3393 oj[i] = garray[col]; 3394 } 3395 3396 /* Set Bnew as off-diagonal portion of *mat */ 3397 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3398 bnew = (Mat_SeqAIJ*)Bnew->data; 3399 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3400 maij->B = Bnew; 3401 3402 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3403 3404 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3405 b->free_a = PETSC_FALSE; 3406 b->free_ij = PETSC_FALSE; 3407 ierr = MatDestroy(&B);CHKERRQ(ierr); 3408 3409 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3410 bnew->free_a = PETSC_TRUE; 3411 bnew->free_ij = PETSC_TRUE; 3412 3413 /* condense columns of maij->B */ 3414 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3415 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3416 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3417 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3418 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3419 PetscFunctionReturn(0); 3420 } 3421 3422 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3423 3424 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3425 { 3426 PetscErrorCode ierr; 3427 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3428 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3429 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3430 Mat M,Msub,B=a->B; 3431 MatScalar *aa; 3432 Mat_SeqAIJ *aij; 3433 PetscInt *garray = a->garray,*colsub,Ncols; 3434 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3435 IS iscol_sub,iscmap; 3436 const PetscInt *is_idx,*cmap; 3437 PetscBool allcolumns=PETSC_FALSE; 3438 MPI_Comm comm; 3439 3440 PetscFunctionBegin; 3441 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3442 3443 if (call == MAT_REUSE_MATRIX) { 3444 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3445 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3446 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3447 3448 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3449 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3450 3451 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3452 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3453 3454 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3455 3456 } else { /* call == MAT_INITIAL_MATRIX) */ 3457 PetscBool flg; 3458 3459 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3460 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3461 3462 /* (1) iscol -> nonscalable iscol_local */ 3463 /* Check for special case: each processor gets entire matrix columns */ 3464 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3465 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3466 if (allcolumns) { 3467 iscol_sub = iscol_local; 3468 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3469 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3470 3471 } else { 3472 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3473 PetscInt *idx,*cmap1,k; 3474 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3475 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3476 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3477 count = 0; 3478 k = 0; 3479 for (i=0; i<Ncols; i++) { 3480 j = is_idx[i]; 3481 if (j >= cstart && j < cend) { 3482 /* diagonal part of mat */ 3483 idx[count] = j; 3484 cmap1[count++] = i; /* column index in submat */ 3485 } else if (Bn) { 3486 /* off-diagonal part of mat */ 3487 if (j == garray[k]) { 3488 idx[count] = j; 3489 cmap1[count++] = i; /* column index in submat */ 3490 } else if (j > garray[k]) { 3491 while (j > garray[k] && k < Bn-1) k++; 3492 if (j == garray[k]) { 3493 idx[count] = j; 3494 cmap1[count++] = i; /* column index in submat */ 3495 } 3496 } 3497 } 3498 } 3499 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3500 3501 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3502 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3503 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3504 3505 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3506 } 3507 3508 /* (3) Create sequential Msub */ 3509 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3510 } 3511 3512 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3513 aij = (Mat_SeqAIJ*)(Msub)->data; 3514 ii = aij->i; 3515 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3516 3517 /* 3518 m - number of local rows 3519 Ncols - number of columns (same on all processors) 3520 rstart - first row in new global matrix generated 3521 */ 3522 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3523 3524 if (call == MAT_INITIAL_MATRIX) { 3525 /* (4) Create parallel newmat */ 3526 PetscMPIInt rank,size; 3527 PetscInt csize; 3528 3529 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3530 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3531 3532 /* 3533 Determine the number of non-zeros in the diagonal and off-diagonal 3534 portions of the matrix in order to do correct preallocation 3535 */ 3536 3537 /* first get start and end of "diagonal" columns */ 3538 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3539 if (csize == PETSC_DECIDE) { 3540 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3541 if (mglobal == Ncols) { /* square matrix */ 3542 nlocal = m; 3543 } else { 3544 nlocal = Ncols/size + ((Ncols % size) > rank); 3545 } 3546 } else { 3547 nlocal = csize; 3548 } 3549 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3550 rstart = rend - nlocal; 3551 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3552 3553 /* next, compute all the lengths */ 3554 jj = aij->j; 3555 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3556 olens = dlens + m; 3557 for (i=0; i<m; i++) { 3558 jend = ii[i+1] - ii[i]; 3559 olen = 0; 3560 dlen = 0; 3561 for (j=0; j<jend; j++) { 3562 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3563 else dlen++; 3564 jj++; 3565 } 3566 olens[i] = olen; 3567 dlens[i] = dlen; 3568 } 3569 3570 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3571 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3572 3573 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3574 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3575 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3576 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3577 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3578 ierr = PetscFree(dlens);CHKERRQ(ierr); 3579 3580 } else { /* call == MAT_REUSE_MATRIX */ 3581 M = *newmat; 3582 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3583 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3584 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3585 /* 3586 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3587 rather than the slower MatSetValues(). 3588 */ 3589 M->was_assembled = PETSC_TRUE; 3590 M->assembled = PETSC_FALSE; 3591 } 3592 3593 /* (5) Set values of Msub to *newmat */ 3594 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3595 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3596 3597 jj = aij->j; 3598 aa = aij->a; 3599 for (i=0; i<m; i++) { 3600 row = rstart + i; 3601 nz = ii[i+1] - ii[i]; 3602 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3603 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3604 jj += nz; aa += nz; 3605 } 3606 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3607 3608 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3609 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3610 3611 ierr = PetscFree(colsub);CHKERRQ(ierr); 3612 3613 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3614 if (call == MAT_INITIAL_MATRIX) { 3615 *newmat = M; 3616 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3617 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3618 3619 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3620 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3621 3622 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3623 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3624 3625 if (iscol_local) { 3626 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3627 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3628 } 3629 } 3630 PetscFunctionReturn(0); 3631 } 3632 3633 /* 3634 Not great since it makes two copies of the submatrix, first an SeqAIJ 3635 in local and then by concatenating the local matrices the end result. 3636 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3637 3638 Note: This requires a sequential iscol with all indices. 3639 */ 3640 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3641 { 3642 PetscErrorCode ierr; 3643 PetscMPIInt rank,size; 3644 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3645 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3646 Mat M,Mreuse; 3647 MatScalar *aa,*vwork; 3648 MPI_Comm comm; 3649 Mat_SeqAIJ *aij; 3650 PetscBool colflag,allcolumns=PETSC_FALSE; 3651 3652 PetscFunctionBegin; 3653 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3654 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3655 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3656 3657 /* Check for special case: each processor gets entire matrix columns */ 3658 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3659 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3660 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3661 3662 if (call == MAT_REUSE_MATRIX) { 3663 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3664 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3665 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3666 } else { 3667 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3668 } 3669 3670 /* 3671 m - number of local rows 3672 n - number of columns (same on all processors) 3673 rstart - first row in new global matrix generated 3674 */ 3675 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3676 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3677 if (call == MAT_INITIAL_MATRIX) { 3678 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3679 ii = aij->i; 3680 jj = aij->j; 3681 3682 /* 3683 Determine the number of non-zeros in the diagonal and off-diagonal 3684 portions of the matrix in order to do correct preallocation 3685 */ 3686 3687 /* first get start and end of "diagonal" columns */ 3688 if (csize == PETSC_DECIDE) { 3689 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3690 if (mglobal == n) { /* square matrix */ 3691 nlocal = m; 3692 } else { 3693 nlocal = n/size + ((n % size) > rank); 3694 } 3695 } else { 3696 nlocal = csize; 3697 } 3698 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3699 rstart = rend - nlocal; 3700 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3701 3702 /* next, compute all the lengths */ 3703 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3704 olens = dlens + m; 3705 for (i=0; i<m; i++) { 3706 jend = ii[i+1] - ii[i]; 3707 olen = 0; 3708 dlen = 0; 3709 for (j=0; j<jend; j++) { 3710 if (*jj < rstart || *jj >= rend) olen++; 3711 else dlen++; 3712 jj++; 3713 } 3714 olens[i] = olen; 3715 dlens[i] = dlen; 3716 } 3717 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3718 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3719 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3720 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3721 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3722 ierr = PetscFree(dlens);CHKERRQ(ierr); 3723 } else { 3724 PetscInt ml,nl; 3725 3726 M = *newmat; 3727 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3728 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3729 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3730 /* 3731 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3732 rather than the slower MatSetValues(). 3733 */ 3734 M->was_assembled = PETSC_TRUE; 3735 M->assembled = PETSC_FALSE; 3736 } 3737 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3738 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3739 ii = aij->i; 3740 jj = aij->j; 3741 aa = aij->a; 3742 for (i=0; i<m; i++) { 3743 row = rstart + i; 3744 nz = ii[i+1] - ii[i]; 3745 cwork = jj; jj += nz; 3746 vwork = aa; aa += nz; 3747 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3748 } 3749 3750 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3751 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3752 *newmat = M; 3753 3754 /* save submatrix used in processor for next request */ 3755 if (call == MAT_INITIAL_MATRIX) { 3756 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3757 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3758 } 3759 PetscFunctionReturn(0); 3760 } 3761 3762 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3763 { 3764 PetscInt m,cstart, cend,j,nnz,i,d; 3765 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3766 const PetscInt *JJ; 3767 PetscErrorCode ierr; 3768 PetscBool nooffprocentries; 3769 3770 PetscFunctionBegin; 3771 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3772 3773 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3774 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3775 m = B->rmap->n; 3776 cstart = B->cmap->rstart; 3777 cend = B->cmap->rend; 3778 rstart = B->rmap->rstart; 3779 3780 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3781 3782 if (PetscDefined(USE_DEBUG)) { 3783 for (i=0; i<m; i++) { 3784 nnz = Ii[i+1]- Ii[i]; 3785 JJ = J + Ii[i]; 3786 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3787 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3788 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3789 } 3790 } 3791 3792 for (i=0; i<m; i++) { 3793 nnz = Ii[i+1]- Ii[i]; 3794 JJ = J + Ii[i]; 3795 nnz_max = PetscMax(nnz_max,nnz); 3796 d = 0; 3797 for (j=0; j<nnz; j++) { 3798 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3799 } 3800 d_nnz[i] = d; 3801 o_nnz[i] = nnz - d; 3802 } 3803 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3804 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3805 3806 for (i=0; i<m; i++) { 3807 ii = i + rstart; 3808 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3809 } 3810 nooffprocentries = B->nooffprocentries; 3811 B->nooffprocentries = PETSC_TRUE; 3812 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3813 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3814 B->nooffprocentries = nooffprocentries; 3815 3816 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3817 PetscFunctionReturn(0); 3818 } 3819 3820 /*@ 3821 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3822 (the default parallel PETSc format). 3823 3824 Collective 3825 3826 Input Parameters: 3827 + B - the matrix 3828 . i - the indices into j for the start of each local row (starts with zero) 3829 . j - the column indices for each local row (starts with zero) 3830 - v - optional values in the matrix 3831 3832 Level: developer 3833 3834 Notes: 3835 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3836 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3837 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3838 3839 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3840 3841 The format which is used for the sparse matrix input, is equivalent to a 3842 row-major ordering.. i.e for the following matrix, the input data expected is 3843 as shown 3844 3845 $ 1 0 0 3846 $ 2 0 3 P0 3847 $ ------- 3848 $ 4 5 6 P1 3849 $ 3850 $ Process0 [P0]: rows_owned=[0,1] 3851 $ i = {0,1,3} [size = nrow+1 = 2+1] 3852 $ j = {0,0,2} [size = 3] 3853 $ v = {1,2,3} [size = 3] 3854 $ 3855 $ Process1 [P1]: rows_owned=[2] 3856 $ i = {0,3} [size = nrow+1 = 1+1] 3857 $ j = {0,1,2} [size = 3] 3858 $ v = {4,5,6} [size = 3] 3859 3860 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3861 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3862 @*/ 3863 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3864 { 3865 PetscErrorCode ierr; 3866 3867 PetscFunctionBegin; 3868 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3869 PetscFunctionReturn(0); 3870 } 3871 3872 /*@C 3873 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3874 (the default parallel PETSc format). For good matrix assembly performance 3875 the user should preallocate the matrix storage by setting the parameters 3876 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3877 performance can be increased by more than a factor of 50. 3878 3879 Collective 3880 3881 Input Parameters: 3882 + B - the matrix 3883 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3884 (same value is used for all local rows) 3885 . d_nnz - array containing the number of nonzeros in the various rows of the 3886 DIAGONAL portion of the local submatrix (possibly different for each row) 3887 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3888 The size of this array is equal to the number of local rows, i.e 'm'. 3889 For matrices that will be factored, you must leave room for (and set) 3890 the diagonal entry even if it is zero. 3891 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3892 submatrix (same value is used for all local rows). 3893 - o_nnz - array containing the number of nonzeros in the various rows of the 3894 OFF-DIAGONAL portion of the local submatrix (possibly different for 3895 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3896 structure. The size of this array is equal to the number 3897 of local rows, i.e 'm'. 3898 3899 If the *_nnz parameter is given then the *_nz parameter is ignored 3900 3901 The AIJ format (also called the Yale sparse matrix format or 3902 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3903 storage. The stored row and column indices begin with zero. 3904 See Users-Manual: ch_mat for details. 3905 3906 The parallel matrix is partitioned such that the first m0 rows belong to 3907 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3908 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3909 3910 The DIAGONAL portion of the local submatrix of a processor can be defined 3911 as the submatrix which is obtained by extraction the part corresponding to 3912 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3913 first row that belongs to the processor, r2 is the last row belonging to 3914 the this processor, and c1-c2 is range of indices of the local part of a 3915 vector suitable for applying the matrix to. This is an mxn matrix. In the 3916 common case of a square matrix, the row and column ranges are the same and 3917 the DIAGONAL part is also square. The remaining portion of the local 3918 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3919 3920 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3921 3922 You can call MatGetInfo() to get information on how effective the preallocation was; 3923 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3924 You can also run with the option -info and look for messages with the string 3925 malloc in them to see if additional memory allocation was needed. 3926 3927 Example usage: 3928 3929 Consider the following 8x8 matrix with 34 non-zero values, that is 3930 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3931 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3932 as follows: 3933 3934 .vb 3935 1 2 0 | 0 3 0 | 0 4 3936 Proc0 0 5 6 | 7 0 0 | 8 0 3937 9 0 10 | 11 0 0 | 12 0 3938 ------------------------------------- 3939 13 0 14 | 15 16 17 | 0 0 3940 Proc1 0 18 0 | 19 20 21 | 0 0 3941 0 0 0 | 22 23 0 | 24 0 3942 ------------------------------------- 3943 Proc2 25 26 27 | 0 0 28 | 29 0 3944 30 0 0 | 31 32 33 | 0 34 3945 .ve 3946 3947 This can be represented as a collection of submatrices as: 3948 3949 .vb 3950 A B C 3951 D E F 3952 G H I 3953 .ve 3954 3955 Where the submatrices A,B,C are owned by proc0, D,E,F are 3956 owned by proc1, G,H,I are owned by proc2. 3957 3958 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3959 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3960 The 'M','N' parameters are 8,8, and have the same values on all procs. 3961 3962 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3963 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3964 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3965 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3966 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3967 matrix, ans [DF] as another SeqAIJ matrix. 3968 3969 When d_nz, o_nz parameters are specified, d_nz storage elements are 3970 allocated for every row of the local diagonal submatrix, and o_nz 3971 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3972 One way to choose d_nz and o_nz is to use the max nonzerors per local 3973 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3974 In this case, the values of d_nz,o_nz are: 3975 .vb 3976 proc0 : dnz = 2, o_nz = 2 3977 proc1 : dnz = 3, o_nz = 2 3978 proc2 : dnz = 1, o_nz = 4 3979 .ve 3980 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3981 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3982 for proc3. i.e we are using 12+15+10=37 storage locations to store 3983 34 values. 3984 3985 When d_nnz, o_nnz parameters are specified, the storage is specified 3986 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3987 In the above case the values for d_nnz,o_nnz are: 3988 .vb 3989 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3990 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3991 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3992 .ve 3993 Here the space allocated is sum of all the above values i.e 34, and 3994 hence pre-allocation is perfect. 3995 3996 Level: intermediate 3997 3998 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3999 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4000 @*/ 4001 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4002 { 4003 PetscErrorCode ierr; 4004 4005 PetscFunctionBegin; 4006 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4007 PetscValidType(B,1); 4008 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4009 PetscFunctionReturn(0); 4010 } 4011 4012 /*@ 4013 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4014 CSR format for the local rows. 4015 4016 Collective 4017 4018 Input Parameters: 4019 + comm - MPI communicator 4020 . m - number of local rows (Cannot be PETSC_DECIDE) 4021 . n - This value should be the same as the local size used in creating the 4022 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4023 calculated if N is given) For square matrices n is almost always m. 4024 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4025 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4026 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4027 . j - column indices 4028 - a - matrix values 4029 4030 Output Parameter: 4031 . mat - the matrix 4032 4033 Level: intermediate 4034 4035 Notes: 4036 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4037 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4038 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4039 4040 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4041 4042 The format which is used for the sparse matrix input, is equivalent to a 4043 row-major ordering.. i.e for the following matrix, the input data expected is 4044 as shown 4045 4046 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4047 4048 $ 1 0 0 4049 $ 2 0 3 P0 4050 $ ------- 4051 $ 4 5 6 P1 4052 $ 4053 $ Process0 [P0]: rows_owned=[0,1] 4054 $ i = {0,1,3} [size = nrow+1 = 2+1] 4055 $ j = {0,0,2} [size = 3] 4056 $ v = {1,2,3} [size = 3] 4057 $ 4058 $ Process1 [P1]: rows_owned=[2] 4059 $ i = {0,3} [size = nrow+1 = 1+1] 4060 $ j = {0,1,2} [size = 3] 4061 $ v = {4,5,6} [size = 3] 4062 4063 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4064 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4065 @*/ 4066 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4067 { 4068 PetscErrorCode ierr; 4069 4070 PetscFunctionBegin; 4071 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4072 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4073 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4074 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4075 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4076 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4077 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4078 PetscFunctionReturn(0); 4079 } 4080 4081 /*@ 4082 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4083 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4084 4085 Collective 4086 4087 Input Parameters: 4088 + mat - the matrix 4089 . m - number of local rows (Cannot be PETSC_DECIDE) 4090 . n - This value should be the same as the local size used in creating the 4091 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4092 calculated if N is given) For square matrices n is almost always m. 4093 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4094 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4095 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4096 . J - column indices 4097 - v - matrix values 4098 4099 Level: intermediate 4100 4101 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4102 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4103 @*/ 4104 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4105 { 4106 PetscErrorCode ierr; 4107 PetscInt cstart,nnz,i,j; 4108 PetscInt *ld; 4109 PetscBool nooffprocentries; 4110 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4111 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4112 PetscScalar *ad = Ad->a, *ao = Ao->a; 4113 const PetscInt *Adi = Ad->i; 4114 PetscInt ldi,Iii,md; 4115 4116 PetscFunctionBegin; 4117 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4118 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4119 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4120 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4121 4122 cstart = mat->cmap->rstart; 4123 if (!Aij->ld) { 4124 /* count number of entries below block diagonal */ 4125 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4126 Aij->ld = ld; 4127 for (i=0; i<m; i++) { 4128 nnz = Ii[i+1]- Ii[i]; 4129 j = 0; 4130 while (J[j] < cstart && j < nnz) {j++;} 4131 J += nnz; 4132 ld[i] = j; 4133 } 4134 } else { 4135 ld = Aij->ld; 4136 } 4137 4138 for (i=0; i<m; i++) { 4139 nnz = Ii[i+1]- Ii[i]; 4140 Iii = Ii[i]; 4141 ldi = ld[i]; 4142 md = Adi[i+1]-Adi[i]; 4143 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4144 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4145 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4146 ad += md; 4147 ao += nnz - md; 4148 } 4149 nooffprocentries = mat->nooffprocentries; 4150 mat->nooffprocentries = PETSC_TRUE; 4151 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4152 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4153 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4154 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4155 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4156 mat->nooffprocentries = nooffprocentries; 4157 PetscFunctionReturn(0); 4158 } 4159 4160 /*@C 4161 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4162 (the default parallel PETSc format). For good matrix assembly performance 4163 the user should preallocate the matrix storage by setting the parameters 4164 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4165 performance can be increased by more than a factor of 50. 4166 4167 Collective 4168 4169 Input Parameters: 4170 + comm - MPI communicator 4171 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4172 This value should be the same as the local size used in creating the 4173 y vector for the matrix-vector product y = Ax. 4174 . n - This value should be the same as the local size used in creating the 4175 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4176 calculated if N is given) For square matrices n is almost always m. 4177 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4178 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4179 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4180 (same value is used for all local rows) 4181 . d_nnz - array containing the number of nonzeros in the various rows of the 4182 DIAGONAL portion of the local submatrix (possibly different for each row) 4183 or NULL, if d_nz is used to specify the nonzero structure. 4184 The size of this array is equal to the number of local rows, i.e 'm'. 4185 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4186 submatrix (same value is used for all local rows). 4187 - o_nnz - array containing the number of nonzeros in the various rows of the 4188 OFF-DIAGONAL portion of the local submatrix (possibly different for 4189 each row) or NULL, if o_nz is used to specify the nonzero 4190 structure. The size of this array is equal to the number 4191 of local rows, i.e 'm'. 4192 4193 Output Parameter: 4194 . A - the matrix 4195 4196 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4197 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4198 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4199 4200 Notes: 4201 If the *_nnz parameter is given then the *_nz parameter is ignored 4202 4203 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4204 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4205 storage requirements for this matrix. 4206 4207 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4208 processor than it must be used on all processors that share the object for 4209 that argument. 4210 4211 The user MUST specify either the local or global matrix dimensions 4212 (possibly both). 4213 4214 The parallel matrix is partitioned across processors such that the 4215 first m0 rows belong to process 0, the next m1 rows belong to 4216 process 1, the next m2 rows belong to process 2 etc.. where 4217 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4218 values corresponding to [m x N] submatrix. 4219 4220 The columns are logically partitioned with the n0 columns belonging 4221 to 0th partition, the next n1 columns belonging to the next 4222 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4223 4224 The DIAGONAL portion of the local submatrix on any given processor 4225 is the submatrix corresponding to the rows and columns m,n 4226 corresponding to the given processor. i.e diagonal matrix on 4227 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4228 etc. The remaining portion of the local submatrix [m x (N-n)] 4229 constitute the OFF-DIAGONAL portion. The example below better 4230 illustrates this concept. 4231 4232 For a square global matrix we define each processor's diagonal portion 4233 to be its local rows and the corresponding columns (a square submatrix); 4234 each processor's off-diagonal portion encompasses the remainder of the 4235 local matrix (a rectangular submatrix). 4236 4237 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4238 4239 When calling this routine with a single process communicator, a matrix of 4240 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4241 type of communicator, use the construction mechanism 4242 .vb 4243 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4244 .ve 4245 4246 $ MatCreate(...,&A); 4247 $ MatSetType(A,MATMPIAIJ); 4248 $ MatSetSizes(A, m,n,M,N); 4249 $ MatMPIAIJSetPreallocation(A,...); 4250 4251 By default, this format uses inodes (identical nodes) when possible. 4252 We search for consecutive rows with the same nonzero structure, thereby 4253 reusing matrix information to achieve increased efficiency. 4254 4255 Options Database Keys: 4256 + -mat_no_inode - Do not use inodes 4257 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4258 4259 4260 4261 Example usage: 4262 4263 Consider the following 8x8 matrix with 34 non-zero values, that is 4264 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4265 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4266 as follows 4267 4268 .vb 4269 1 2 0 | 0 3 0 | 0 4 4270 Proc0 0 5 6 | 7 0 0 | 8 0 4271 9 0 10 | 11 0 0 | 12 0 4272 ------------------------------------- 4273 13 0 14 | 15 16 17 | 0 0 4274 Proc1 0 18 0 | 19 20 21 | 0 0 4275 0 0 0 | 22 23 0 | 24 0 4276 ------------------------------------- 4277 Proc2 25 26 27 | 0 0 28 | 29 0 4278 30 0 0 | 31 32 33 | 0 34 4279 .ve 4280 4281 This can be represented as a collection of submatrices as 4282 4283 .vb 4284 A B C 4285 D E F 4286 G H I 4287 .ve 4288 4289 Where the submatrices A,B,C are owned by proc0, D,E,F are 4290 owned by proc1, G,H,I are owned by proc2. 4291 4292 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4293 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4294 The 'M','N' parameters are 8,8, and have the same values on all procs. 4295 4296 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4297 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4298 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4299 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4300 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4301 matrix, ans [DF] as another SeqAIJ matrix. 4302 4303 When d_nz, o_nz parameters are specified, d_nz storage elements are 4304 allocated for every row of the local diagonal submatrix, and o_nz 4305 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4306 One way to choose d_nz and o_nz is to use the max nonzerors per local 4307 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4308 In this case, the values of d_nz,o_nz are 4309 .vb 4310 proc0 : dnz = 2, o_nz = 2 4311 proc1 : dnz = 3, o_nz = 2 4312 proc2 : dnz = 1, o_nz = 4 4313 .ve 4314 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4315 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4316 for proc3. i.e we are using 12+15+10=37 storage locations to store 4317 34 values. 4318 4319 When d_nnz, o_nnz parameters are specified, the storage is specified 4320 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4321 In the above case the values for d_nnz,o_nnz are 4322 .vb 4323 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4324 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4325 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4326 .ve 4327 Here the space allocated is sum of all the above values i.e 34, and 4328 hence pre-allocation is perfect. 4329 4330 Level: intermediate 4331 4332 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4333 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4334 @*/ 4335 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4336 { 4337 PetscErrorCode ierr; 4338 PetscMPIInt size; 4339 4340 PetscFunctionBegin; 4341 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4342 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4343 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4344 if (size > 1) { 4345 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4346 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4347 } else { 4348 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4349 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4350 } 4351 PetscFunctionReturn(0); 4352 } 4353 4354 /*@C 4355 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4356 4357 Not collective 4358 4359 Input Parameter: 4360 . A - The MPIAIJ matrix 4361 4362 Output Parameters: 4363 + Ad - The local diagonal block as a SeqAIJ matrix 4364 . Ao - The local off-diagonal block as a SeqAIJ matrix 4365 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4366 4367 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4368 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4369 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4370 local column numbers to global column numbers in the original matrix. 4371 4372 Level: intermediate 4373 4374 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4375 @*/ 4376 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4377 { 4378 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4379 PetscBool flg; 4380 PetscErrorCode ierr; 4381 4382 PetscFunctionBegin; 4383 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4384 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4385 if (Ad) *Ad = a->A; 4386 if (Ao) *Ao = a->B; 4387 if (colmap) *colmap = a->garray; 4388 PetscFunctionReturn(0); 4389 } 4390 4391 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4392 { 4393 PetscErrorCode ierr; 4394 PetscInt m,N,i,rstart,nnz,Ii; 4395 PetscInt *indx; 4396 PetscScalar *values; 4397 4398 PetscFunctionBegin; 4399 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4400 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4401 PetscInt *dnz,*onz,sum,bs,cbs; 4402 4403 if (n == PETSC_DECIDE) { 4404 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4405 } 4406 /* Check sum(n) = N */ 4407 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4408 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4409 4410 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4411 rstart -= m; 4412 4413 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4414 for (i=0; i<m; i++) { 4415 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4416 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4417 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4418 } 4419 4420 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4421 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4422 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4423 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4424 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4425 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4426 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4427 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4428 } 4429 4430 /* numeric phase */ 4431 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4432 for (i=0; i<m; i++) { 4433 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4434 Ii = i + rstart; 4435 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4436 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4437 } 4438 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4439 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4440 PetscFunctionReturn(0); 4441 } 4442 4443 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4444 { 4445 PetscErrorCode ierr; 4446 PetscMPIInt rank; 4447 PetscInt m,N,i,rstart,nnz; 4448 size_t len; 4449 const PetscInt *indx; 4450 PetscViewer out; 4451 char *name; 4452 Mat B; 4453 const PetscScalar *values; 4454 4455 PetscFunctionBegin; 4456 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4457 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4458 /* Should this be the type of the diagonal block of A? */ 4459 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4460 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4461 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4462 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4463 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4464 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4465 for (i=0; i<m; i++) { 4466 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4467 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4468 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4469 } 4470 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4471 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4472 4473 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4474 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4475 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4476 sprintf(name,"%s.%d",outfile,rank); 4477 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4478 ierr = PetscFree(name);CHKERRQ(ierr); 4479 ierr = MatView(B,out);CHKERRQ(ierr); 4480 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4481 ierr = MatDestroy(&B);CHKERRQ(ierr); 4482 PetscFunctionReturn(0); 4483 } 4484 4485 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4486 { 4487 PetscErrorCode ierr; 4488 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4489 4490 PetscFunctionBegin; 4491 if (!merge) PetscFunctionReturn(0); 4492 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4493 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4494 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4495 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4504 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4505 ierr = PetscFree(merge);CHKERRQ(ierr); 4506 PetscFunctionReturn(0); 4507 } 4508 4509 #include <../src/mat/utils/freespace.h> 4510 #include <petscbt.h> 4511 4512 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4513 { 4514 PetscErrorCode ierr; 4515 MPI_Comm comm; 4516 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4517 PetscMPIInt size,rank,taga,*len_s; 4518 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4519 PetscInt proc,m; 4520 PetscInt **buf_ri,**buf_rj; 4521 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4522 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4523 MPI_Request *s_waits,*r_waits; 4524 MPI_Status *status; 4525 MatScalar *aa=a->a; 4526 MatScalar **abuf_r,*ba_i; 4527 Mat_Merge_SeqsToMPI *merge; 4528 PetscContainer container; 4529 4530 PetscFunctionBegin; 4531 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4532 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4533 4534 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4535 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4536 4537 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4538 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4539 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4540 4541 bi = merge->bi; 4542 bj = merge->bj; 4543 buf_ri = merge->buf_ri; 4544 buf_rj = merge->buf_rj; 4545 4546 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4547 owners = merge->rowmap->range; 4548 len_s = merge->len_s; 4549 4550 /* send and recv matrix values */ 4551 /*-----------------------------*/ 4552 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4553 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4554 4555 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4556 for (proc=0,k=0; proc<size; proc++) { 4557 if (!len_s[proc]) continue; 4558 i = owners[proc]; 4559 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4560 k++; 4561 } 4562 4563 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4564 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4565 ierr = PetscFree(status);CHKERRQ(ierr); 4566 4567 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4568 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4569 4570 /* insert mat values of mpimat */ 4571 /*----------------------------*/ 4572 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4573 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4574 4575 for (k=0; k<merge->nrecv; k++) { 4576 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4577 nrows = *(buf_ri_k[k]); 4578 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4579 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4580 } 4581 4582 /* set values of ba */ 4583 m = merge->rowmap->n; 4584 for (i=0; i<m; i++) { 4585 arow = owners[rank] + i; 4586 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4587 bnzi = bi[i+1] - bi[i]; 4588 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4589 4590 /* add local non-zero vals of this proc's seqmat into ba */ 4591 anzi = ai[arow+1] - ai[arow]; 4592 aj = a->j + ai[arow]; 4593 aa = a->a + ai[arow]; 4594 nextaj = 0; 4595 for (j=0; nextaj<anzi; j++) { 4596 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4597 ba_i[j] += aa[nextaj++]; 4598 } 4599 } 4600 4601 /* add received vals into ba */ 4602 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4603 /* i-th row */ 4604 if (i == *nextrow[k]) { 4605 anzi = *(nextai[k]+1) - *nextai[k]; 4606 aj = buf_rj[k] + *(nextai[k]); 4607 aa = abuf_r[k] + *(nextai[k]); 4608 nextaj = 0; 4609 for (j=0; nextaj<anzi; j++) { 4610 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4611 ba_i[j] += aa[nextaj++]; 4612 } 4613 } 4614 nextrow[k]++; nextai[k]++; 4615 } 4616 } 4617 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4618 } 4619 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4620 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4621 4622 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4623 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4624 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4625 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4626 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4627 PetscFunctionReturn(0); 4628 } 4629 4630 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4631 { 4632 PetscErrorCode ierr; 4633 Mat B_mpi; 4634 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4635 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4636 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4637 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4638 PetscInt len,proc,*dnz,*onz,bs,cbs; 4639 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4640 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4641 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4642 MPI_Status *status; 4643 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4644 PetscBT lnkbt; 4645 Mat_Merge_SeqsToMPI *merge; 4646 PetscContainer container; 4647 4648 PetscFunctionBegin; 4649 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4650 4651 /* make sure it is a PETSc comm */ 4652 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4653 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4654 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4655 4656 ierr = PetscNew(&merge);CHKERRQ(ierr); 4657 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4658 4659 /* determine row ownership */ 4660 /*---------------------------------------------------------*/ 4661 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4662 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4663 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4664 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4665 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4666 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4667 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4668 4669 m = merge->rowmap->n; 4670 owners = merge->rowmap->range; 4671 4672 /* determine the number of messages to send, their lengths */ 4673 /*---------------------------------------------------------*/ 4674 len_s = merge->len_s; 4675 4676 len = 0; /* length of buf_si[] */ 4677 merge->nsend = 0; 4678 for (proc=0; proc<size; proc++) { 4679 len_si[proc] = 0; 4680 if (proc == rank) { 4681 len_s[proc] = 0; 4682 } else { 4683 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4684 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4685 } 4686 if (len_s[proc]) { 4687 merge->nsend++; 4688 nrows = 0; 4689 for (i=owners[proc]; i<owners[proc+1]; i++) { 4690 if (ai[i+1] > ai[i]) nrows++; 4691 } 4692 len_si[proc] = 2*(nrows+1); 4693 len += len_si[proc]; 4694 } 4695 } 4696 4697 /* determine the number and length of messages to receive for ij-structure */ 4698 /*-------------------------------------------------------------------------*/ 4699 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4700 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4701 4702 /* post the Irecv of j-structure */ 4703 /*-------------------------------*/ 4704 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4705 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4706 4707 /* post the Isend of j-structure */ 4708 /*--------------------------------*/ 4709 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4710 4711 for (proc=0, k=0; proc<size; proc++) { 4712 if (!len_s[proc]) continue; 4713 i = owners[proc]; 4714 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4715 k++; 4716 } 4717 4718 /* receives and sends of j-structure are complete */ 4719 /*------------------------------------------------*/ 4720 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4721 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4722 4723 /* send and recv i-structure */ 4724 /*---------------------------*/ 4725 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4726 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4727 4728 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4729 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4730 for (proc=0,k=0; proc<size; proc++) { 4731 if (!len_s[proc]) continue; 4732 /* form outgoing message for i-structure: 4733 buf_si[0]: nrows to be sent 4734 [1:nrows]: row index (global) 4735 [nrows+1:2*nrows+1]: i-structure index 4736 */ 4737 /*-------------------------------------------*/ 4738 nrows = len_si[proc]/2 - 1; 4739 buf_si_i = buf_si + nrows+1; 4740 buf_si[0] = nrows; 4741 buf_si_i[0] = 0; 4742 nrows = 0; 4743 for (i=owners[proc]; i<owners[proc+1]; i++) { 4744 anzi = ai[i+1] - ai[i]; 4745 if (anzi) { 4746 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4747 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4748 nrows++; 4749 } 4750 } 4751 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4752 k++; 4753 buf_si += len_si[proc]; 4754 } 4755 4756 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4757 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4758 4759 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4760 for (i=0; i<merge->nrecv; i++) { 4761 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4762 } 4763 4764 ierr = PetscFree(len_si);CHKERRQ(ierr); 4765 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4766 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4767 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4768 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4769 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4770 ierr = PetscFree(status);CHKERRQ(ierr); 4771 4772 /* compute a local seq matrix in each processor */ 4773 /*----------------------------------------------*/ 4774 /* allocate bi array and free space for accumulating nonzero column info */ 4775 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4776 bi[0] = 0; 4777 4778 /* create and initialize a linked list */ 4779 nlnk = N+1; 4780 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4781 4782 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4783 len = ai[owners[rank+1]] - ai[owners[rank]]; 4784 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4785 4786 current_space = free_space; 4787 4788 /* determine symbolic info for each local row */ 4789 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4790 4791 for (k=0; k<merge->nrecv; k++) { 4792 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4793 nrows = *buf_ri_k[k]; 4794 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4795 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4796 } 4797 4798 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4799 len = 0; 4800 for (i=0; i<m; i++) { 4801 bnzi = 0; 4802 /* add local non-zero cols of this proc's seqmat into lnk */ 4803 arow = owners[rank] + i; 4804 anzi = ai[arow+1] - ai[arow]; 4805 aj = a->j + ai[arow]; 4806 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4807 bnzi += nlnk; 4808 /* add received col data into lnk */ 4809 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4810 if (i == *nextrow[k]) { /* i-th row */ 4811 anzi = *(nextai[k]+1) - *nextai[k]; 4812 aj = buf_rj[k] + *nextai[k]; 4813 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4814 bnzi += nlnk; 4815 nextrow[k]++; nextai[k]++; 4816 } 4817 } 4818 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4819 4820 /* if free space is not available, make more free space */ 4821 if (current_space->local_remaining<bnzi) { 4822 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4823 nspacedouble++; 4824 } 4825 /* copy data into free space, then initialize lnk */ 4826 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4827 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4828 4829 current_space->array += bnzi; 4830 current_space->local_used += bnzi; 4831 current_space->local_remaining -= bnzi; 4832 4833 bi[i+1] = bi[i] + bnzi; 4834 } 4835 4836 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4837 4838 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4839 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4840 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4841 4842 /* create symbolic parallel matrix B_mpi */ 4843 /*---------------------------------------*/ 4844 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4845 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4846 if (n==PETSC_DECIDE) { 4847 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4848 } else { 4849 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4850 } 4851 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4852 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4853 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4854 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4855 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4856 4857 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4858 B_mpi->assembled = PETSC_FALSE; 4859 merge->bi = bi; 4860 merge->bj = bj; 4861 merge->buf_ri = buf_ri; 4862 merge->buf_rj = buf_rj; 4863 merge->coi = NULL; 4864 merge->coj = NULL; 4865 merge->owners_co = NULL; 4866 4867 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4868 4869 /* attach the supporting struct to B_mpi for reuse */ 4870 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4871 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4872 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4873 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4874 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4875 *mpimat = B_mpi; 4876 4877 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4878 PetscFunctionReturn(0); 4879 } 4880 4881 /*@C 4882 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4883 matrices from each processor 4884 4885 Collective 4886 4887 Input Parameters: 4888 + comm - the communicators the parallel matrix will live on 4889 . seqmat - the input sequential matrices 4890 . m - number of local rows (or PETSC_DECIDE) 4891 . n - number of local columns (or PETSC_DECIDE) 4892 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4893 4894 Output Parameter: 4895 . mpimat - the parallel matrix generated 4896 4897 Level: advanced 4898 4899 Notes: 4900 The dimensions of the sequential matrix in each processor MUST be the same. 4901 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4902 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4903 @*/ 4904 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4905 { 4906 PetscErrorCode ierr; 4907 PetscMPIInt size; 4908 4909 PetscFunctionBegin; 4910 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4911 if (size == 1) { 4912 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4913 if (scall == MAT_INITIAL_MATRIX) { 4914 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4915 } else { 4916 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4917 } 4918 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4919 PetscFunctionReturn(0); 4920 } 4921 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4922 if (scall == MAT_INITIAL_MATRIX) { 4923 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4924 } 4925 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4926 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4927 PetscFunctionReturn(0); 4928 } 4929 4930 /*@ 4931 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4932 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4933 with MatGetSize() 4934 4935 Not Collective 4936 4937 Input Parameters: 4938 + A - the matrix 4939 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4940 4941 Output Parameter: 4942 . A_loc - the local sequential matrix generated 4943 4944 Level: developer 4945 4946 Notes: 4947 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4948 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4949 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4950 modify the values of the returned A_loc. 4951 4952 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4953 4954 @*/ 4955 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4956 { 4957 PetscErrorCode ierr; 4958 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4959 Mat_SeqAIJ *mat,*a,*b; 4960 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4961 MatScalar *aa,*ba,*cam; 4962 PetscScalar *ca; 4963 PetscMPIInt size; 4964 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4965 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4966 PetscBool match; 4967 4968 PetscFunctionBegin; 4969 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 4970 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4971 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4972 if (size == 1) { 4973 if (scall == MAT_INITIAL_MATRIX) { 4974 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 4975 *A_loc = mpimat->A; 4976 } else if (scall == MAT_REUSE_MATRIX) { 4977 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4978 } 4979 PetscFunctionReturn(0); 4980 } 4981 4982 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4983 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4984 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4985 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4986 aa = a->a; ba = b->a; 4987 if (scall == MAT_INITIAL_MATRIX) { 4988 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4989 ci[0] = 0; 4990 for (i=0; i<am; i++) { 4991 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4992 } 4993 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4994 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4995 k = 0; 4996 for (i=0; i<am; i++) { 4997 ncols_o = bi[i+1] - bi[i]; 4998 ncols_d = ai[i+1] - ai[i]; 4999 /* off-diagonal portion of A */ 5000 for (jo=0; jo<ncols_o; jo++) { 5001 col = cmap[*bj]; 5002 if (col >= cstart) break; 5003 cj[k] = col; bj++; 5004 ca[k++] = *ba++; 5005 } 5006 /* diagonal portion of A */ 5007 for (j=0; j<ncols_d; j++) { 5008 cj[k] = cstart + *aj++; 5009 ca[k++] = *aa++; 5010 } 5011 /* off-diagonal portion of A */ 5012 for (j=jo; j<ncols_o; j++) { 5013 cj[k] = cmap[*bj++]; 5014 ca[k++] = *ba++; 5015 } 5016 } 5017 /* put together the new matrix */ 5018 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5019 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5020 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5021 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5022 mat->free_a = PETSC_TRUE; 5023 mat->free_ij = PETSC_TRUE; 5024 mat->nonew = 0; 5025 } else if (scall == MAT_REUSE_MATRIX) { 5026 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5027 ci = mat->i; cj = mat->j; cam = mat->a; 5028 for (i=0; i<am; i++) { 5029 /* off-diagonal portion of A */ 5030 ncols_o = bi[i+1] - bi[i]; 5031 for (jo=0; jo<ncols_o; jo++) { 5032 col = cmap[*bj]; 5033 if (col >= cstart) break; 5034 *cam++ = *ba++; bj++; 5035 } 5036 /* diagonal portion of A */ 5037 ncols_d = ai[i+1] - ai[i]; 5038 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5039 /* off-diagonal portion of A */ 5040 for (j=jo; j<ncols_o; j++) { 5041 *cam++ = *ba++; bj++; 5042 } 5043 } 5044 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5045 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5046 PetscFunctionReturn(0); 5047 } 5048 5049 /*@C 5050 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5051 5052 Not Collective 5053 5054 Input Parameters: 5055 + A - the matrix 5056 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5057 - row, col - index sets of rows and columns to extract (or NULL) 5058 5059 Output Parameter: 5060 . A_loc - the local sequential matrix generated 5061 5062 Level: developer 5063 5064 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5065 5066 @*/ 5067 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5068 { 5069 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5070 PetscErrorCode ierr; 5071 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5072 IS isrowa,iscola; 5073 Mat *aloc; 5074 PetscBool match; 5075 5076 PetscFunctionBegin; 5077 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5078 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5079 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5080 if (!row) { 5081 start = A->rmap->rstart; end = A->rmap->rend; 5082 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5083 } else { 5084 isrowa = *row; 5085 } 5086 if (!col) { 5087 start = A->cmap->rstart; 5088 cmap = a->garray; 5089 nzA = a->A->cmap->n; 5090 nzB = a->B->cmap->n; 5091 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5092 ncols = 0; 5093 for (i=0; i<nzB; i++) { 5094 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5095 else break; 5096 } 5097 imark = i; 5098 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5099 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5100 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5101 } else { 5102 iscola = *col; 5103 } 5104 if (scall != MAT_INITIAL_MATRIX) { 5105 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5106 aloc[0] = *A_loc; 5107 } 5108 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5109 if (!col) { /* attach global id of condensed columns */ 5110 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5111 } 5112 *A_loc = aloc[0]; 5113 ierr = PetscFree(aloc);CHKERRQ(ierr); 5114 if (!row) { 5115 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5116 } 5117 if (!col) { 5118 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5119 } 5120 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5121 PetscFunctionReturn(0); 5122 } 5123 5124 /* 5125 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5126 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5127 * on a global size. 5128 * */ 5129 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5130 { 5131 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5132 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5133 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5134 PetscMPIInt owner; 5135 PetscSFNode *iremote,*oiremote; 5136 const PetscInt *lrowindices; 5137 PetscErrorCode ierr; 5138 PetscSF sf,osf; 5139 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5140 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5141 MPI_Comm comm; 5142 ISLocalToGlobalMapping mapping; 5143 5144 PetscFunctionBegin; 5145 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5146 /* plocalsize is the number of roots 5147 * nrows is the number of leaves 5148 * */ 5149 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5150 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5151 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5152 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5153 for (i=0;i<nrows;i++) { 5154 /* Find a remote index and an owner for a row 5155 * The row could be local or remote 5156 * */ 5157 owner = 0; 5158 lidx = 0; 5159 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5160 iremote[i].index = lidx; 5161 iremote[i].rank = owner; 5162 } 5163 /* Create SF to communicate how many nonzero columns for each row */ 5164 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5165 /* SF will figure out the number of nonzero colunms for each row, and their 5166 * offsets 5167 * */ 5168 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5169 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5170 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5171 5172 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5173 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5174 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5175 roffsets[0] = 0; 5176 roffsets[1] = 0; 5177 for (i=0;i<plocalsize;i++) { 5178 /* diag */ 5179 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5180 /* off diag */ 5181 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5182 /* compute offsets so that we relative location for each row */ 5183 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5184 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5185 } 5186 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5187 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5188 /* 'r' means root, and 'l' means leaf */ 5189 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5190 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5191 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5192 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5193 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5194 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5195 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5196 dntotalcols = 0; 5197 ontotalcols = 0; 5198 ncol = 0; 5199 for (i=0;i<nrows;i++) { 5200 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5201 ncol = PetscMax(pnnz[i],ncol); 5202 /* diag */ 5203 dntotalcols += nlcols[i*2+0]; 5204 /* off diag */ 5205 ontotalcols += nlcols[i*2+1]; 5206 } 5207 /* We do not need to figure the right number of columns 5208 * since all the calculations will be done by going through the raw data 5209 * */ 5210 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5211 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5212 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5213 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5214 /* diag */ 5215 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5216 /* off diag */ 5217 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5218 /* diag */ 5219 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5220 /* off diag */ 5221 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5222 dntotalcols = 0; 5223 ontotalcols = 0; 5224 ntotalcols = 0; 5225 for (i=0;i<nrows;i++) { 5226 owner = 0; 5227 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5228 /* Set iremote for diag matrix */ 5229 for (j=0;j<nlcols[i*2+0];j++) { 5230 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5231 iremote[dntotalcols].rank = owner; 5232 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5233 ilocal[dntotalcols++] = ntotalcols++; 5234 } 5235 /* off diag */ 5236 for (j=0;j<nlcols[i*2+1];j++) { 5237 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5238 oiremote[ontotalcols].rank = owner; 5239 oilocal[ontotalcols++] = ntotalcols++; 5240 } 5241 } 5242 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5243 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5244 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5245 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5246 /* P serves as roots and P_oth is leaves 5247 * Diag matrix 5248 * */ 5249 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5250 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5251 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5252 5253 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5254 /* Off diag */ 5255 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5256 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5257 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5258 /* We operate on the matrix internal data for saving memory */ 5259 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5260 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5261 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5262 /* Convert to global indices for diag matrix */ 5263 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5264 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5265 /* We want P_oth store global indices */ 5266 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5267 /* Use memory scalable approach */ 5268 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5269 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5270 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5271 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5272 /* Convert back to local indices */ 5273 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5274 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5275 nout = 0; 5276 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5277 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5278 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5279 /* Exchange values */ 5280 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5281 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5282 /* Stop PETSc from shrinking memory */ 5283 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5284 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5285 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5286 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5287 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5288 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5289 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5290 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5291 PetscFunctionReturn(0); 5292 } 5293 5294 /* 5295 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5296 * This supports MPIAIJ and MAIJ 5297 * */ 5298 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5299 { 5300 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5301 Mat_SeqAIJ *p_oth; 5302 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5303 IS rows,map; 5304 PetscHMapI hamp; 5305 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5306 MPI_Comm comm; 5307 PetscSF sf,osf; 5308 PetscBool has; 5309 PetscErrorCode ierr; 5310 5311 PetscFunctionBegin; 5312 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5313 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5314 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5315 * and then create a submatrix (that often is an overlapping matrix) 5316 * */ 5317 if (reuse == MAT_INITIAL_MATRIX) { 5318 /* Use a hash table to figure out unique keys */ 5319 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5320 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5321 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5322 count = 0; 5323 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5324 for (i=0;i<a->B->cmap->n;i++) { 5325 key = a->garray[i]/dof; 5326 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5327 if (!has) { 5328 mapping[i] = count; 5329 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5330 } else { 5331 /* Current 'i' has the same value the previous step */ 5332 mapping[i] = count-1; 5333 } 5334 } 5335 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5336 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5337 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5338 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5339 off = 0; 5340 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5341 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5342 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5343 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5344 /* In case, the matrix was already created but users want to recreate the matrix */ 5345 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5346 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5347 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5348 ierr = ISDestroy(&map);CHKERRQ(ierr); 5349 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5350 } else if (reuse == MAT_REUSE_MATRIX) { 5351 /* If matrix was already created, we simply update values using SF objects 5352 * that as attached to the matrix ealier. 5353 * */ 5354 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5355 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5356 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5357 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5358 /* Update values in place */ 5359 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5360 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5361 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5362 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5363 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5364 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5365 PetscFunctionReturn(0); 5366 } 5367 5368 /*@C 5369 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5370 5371 Collective on Mat 5372 5373 Input Parameters: 5374 + A,B - the matrices in mpiaij format 5375 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5376 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5377 5378 Output Parameter: 5379 + rowb, colb - index sets of rows and columns of B to extract 5380 - B_seq - the sequential matrix generated 5381 5382 Level: developer 5383 5384 @*/ 5385 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5386 { 5387 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5388 PetscErrorCode ierr; 5389 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5390 IS isrowb,iscolb; 5391 Mat *bseq=NULL; 5392 5393 PetscFunctionBegin; 5394 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5395 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5396 } 5397 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5398 5399 if (scall == MAT_INITIAL_MATRIX) { 5400 start = A->cmap->rstart; 5401 cmap = a->garray; 5402 nzA = a->A->cmap->n; 5403 nzB = a->B->cmap->n; 5404 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5405 ncols = 0; 5406 for (i=0; i<nzB; i++) { /* row < local row index */ 5407 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5408 else break; 5409 } 5410 imark = i; 5411 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5412 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5413 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5414 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5415 } else { 5416 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5417 isrowb = *rowb; iscolb = *colb; 5418 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5419 bseq[0] = *B_seq; 5420 } 5421 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5422 *B_seq = bseq[0]; 5423 ierr = PetscFree(bseq);CHKERRQ(ierr); 5424 if (!rowb) { 5425 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5426 } else { 5427 *rowb = isrowb; 5428 } 5429 if (!colb) { 5430 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5431 } else { 5432 *colb = iscolb; 5433 } 5434 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5435 PetscFunctionReturn(0); 5436 } 5437 5438 /* 5439 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5440 of the OFF-DIAGONAL portion of local A 5441 5442 Collective on Mat 5443 5444 Input Parameters: 5445 + A,B - the matrices in mpiaij format 5446 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5447 5448 Output Parameter: 5449 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5450 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5451 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5452 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5453 5454 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5455 for this matrix. This is not desirable.. 5456 5457 Level: developer 5458 5459 */ 5460 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5461 { 5462 PetscErrorCode ierr; 5463 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5464 Mat_SeqAIJ *b_oth; 5465 VecScatter ctx; 5466 MPI_Comm comm; 5467 const PetscMPIInt *rprocs,*sprocs; 5468 const PetscInt *srow,*rstarts,*sstarts; 5469 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5470 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5471 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5472 MPI_Request *rwaits = NULL,*swaits = NULL; 5473 MPI_Status rstatus; 5474 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5475 5476 PetscFunctionBegin; 5477 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5478 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5479 5480 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5481 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5482 } 5483 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5484 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5485 5486 if (size == 1) { 5487 startsj_s = NULL; 5488 bufa_ptr = NULL; 5489 *B_oth = NULL; 5490 PetscFunctionReturn(0); 5491 } 5492 5493 ctx = a->Mvctx; 5494 tag = ((PetscObject)ctx)->tag; 5495 5496 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5497 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5498 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5499 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5500 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5501 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5502 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5503 5504 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5505 if (scall == MAT_INITIAL_MATRIX) { 5506 /* i-array */ 5507 /*---------*/ 5508 /* post receives */ 5509 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5510 for (i=0; i<nrecvs; i++) { 5511 rowlen = rvalues + rstarts[i]*rbs; 5512 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5513 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5514 } 5515 5516 /* pack the outgoing message */ 5517 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5518 5519 sstartsj[0] = 0; 5520 rstartsj[0] = 0; 5521 len = 0; /* total length of j or a array to be sent */ 5522 if (nsends) { 5523 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5524 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5525 } 5526 for (i=0; i<nsends; i++) { 5527 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5528 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5529 for (j=0; j<nrows; j++) { 5530 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5531 for (l=0; l<sbs; l++) { 5532 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5533 5534 rowlen[j*sbs+l] = ncols; 5535 5536 len += ncols; 5537 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5538 } 5539 k++; 5540 } 5541 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5542 5543 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5544 } 5545 /* recvs and sends of i-array are completed */ 5546 i = nrecvs; 5547 while (i--) { 5548 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5549 } 5550 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5551 ierr = PetscFree(svalues);CHKERRQ(ierr); 5552 5553 /* allocate buffers for sending j and a arrays */ 5554 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5555 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5556 5557 /* create i-array of B_oth */ 5558 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5559 5560 b_othi[0] = 0; 5561 len = 0; /* total length of j or a array to be received */ 5562 k = 0; 5563 for (i=0; i<nrecvs; i++) { 5564 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5565 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5566 for (j=0; j<nrows; j++) { 5567 b_othi[k+1] = b_othi[k] + rowlen[j]; 5568 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5569 k++; 5570 } 5571 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5572 } 5573 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5574 5575 /* allocate space for j and a arrrays of B_oth */ 5576 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5577 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5578 5579 /* j-array */ 5580 /*---------*/ 5581 /* post receives of j-array */ 5582 for (i=0; i<nrecvs; i++) { 5583 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5584 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5585 } 5586 5587 /* pack the outgoing message j-array */ 5588 if (nsends) k = sstarts[0]; 5589 for (i=0; i<nsends; i++) { 5590 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5591 bufJ = bufj+sstartsj[i]; 5592 for (j=0; j<nrows; j++) { 5593 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5594 for (ll=0; ll<sbs; ll++) { 5595 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5596 for (l=0; l<ncols; l++) { 5597 *bufJ++ = cols[l]; 5598 } 5599 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5600 } 5601 } 5602 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5603 } 5604 5605 /* recvs and sends of j-array are completed */ 5606 i = nrecvs; 5607 while (i--) { 5608 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5609 } 5610 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5611 } else if (scall == MAT_REUSE_MATRIX) { 5612 sstartsj = *startsj_s; 5613 rstartsj = *startsj_r; 5614 bufa = *bufa_ptr; 5615 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5616 b_otha = b_oth->a; 5617 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5618 5619 /* a-array */ 5620 /*---------*/ 5621 /* post receives of a-array */ 5622 for (i=0; i<nrecvs; i++) { 5623 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5624 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5625 } 5626 5627 /* pack the outgoing message a-array */ 5628 if (nsends) k = sstarts[0]; 5629 for (i=0; i<nsends; i++) { 5630 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5631 bufA = bufa+sstartsj[i]; 5632 for (j=0; j<nrows; j++) { 5633 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5634 for (ll=0; ll<sbs; ll++) { 5635 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5636 for (l=0; l<ncols; l++) { 5637 *bufA++ = vals[l]; 5638 } 5639 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5640 } 5641 } 5642 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5643 } 5644 /* recvs and sends of a-array are completed */ 5645 i = nrecvs; 5646 while (i--) { 5647 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5648 } 5649 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5650 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5651 5652 if (scall == MAT_INITIAL_MATRIX) { 5653 /* put together the new matrix */ 5654 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5655 5656 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5657 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5658 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5659 b_oth->free_a = PETSC_TRUE; 5660 b_oth->free_ij = PETSC_TRUE; 5661 b_oth->nonew = 0; 5662 5663 ierr = PetscFree(bufj);CHKERRQ(ierr); 5664 if (!startsj_s || !bufa_ptr) { 5665 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5666 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5667 } else { 5668 *startsj_s = sstartsj; 5669 *startsj_r = rstartsj; 5670 *bufa_ptr = bufa; 5671 } 5672 } 5673 5674 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5675 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5676 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5677 PetscFunctionReturn(0); 5678 } 5679 5680 /*@C 5681 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5682 5683 Not Collective 5684 5685 Input Parameters: 5686 . A - The matrix in mpiaij format 5687 5688 Output Parameter: 5689 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5690 . colmap - A map from global column index to local index into lvec 5691 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5692 5693 Level: developer 5694 5695 @*/ 5696 #if defined(PETSC_USE_CTABLE) 5697 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5698 #else 5699 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5700 #endif 5701 { 5702 Mat_MPIAIJ *a; 5703 5704 PetscFunctionBegin; 5705 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5706 PetscValidPointer(lvec, 2); 5707 PetscValidPointer(colmap, 3); 5708 PetscValidPointer(multScatter, 4); 5709 a = (Mat_MPIAIJ*) A->data; 5710 if (lvec) *lvec = a->lvec; 5711 if (colmap) *colmap = a->colmap; 5712 if (multScatter) *multScatter = a->Mvctx; 5713 PetscFunctionReturn(0); 5714 } 5715 5716 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5717 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5718 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5719 #if defined(PETSC_HAVE_MKL_SPARSE) 5720 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5721 #endif 5722 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5723 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5724 #if defined(PETSC_HAVE_ELEMENTAL) 5725 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5726 #endif 5727 #if defined(PETSC_HAVE_HYPRE) 5728 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5729 #endif 5730 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5731 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5732 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5733 5734 /* 5735 Computes (B'*A')' since computing B*A directly is untenable 5736 5737 n p p 5738 ( ) ( ) ( ) 5739 m ( A ) * n ( B ) = m ( C ) 5740 ( ) ( ) ( ) 5741 5742 */ 5743 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5744 { 5745 PetscErrorCode ierr; 5746 Mat At,Bt,Ct; 5747 5748 PetscFunctionBegin; 5749 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5750 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5751 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5752 ierr = MatDestroy(&At);CHKERRQ(ierr); 5753 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5754 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5755 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5756 PetscFunctionReturn(0); 5757 } 5758 5759 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5760 { 5761 PetscErrorCode ierr; 5762 PetscBool cisdense; 5763 5764 PetscFunctionBegin; 5765 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5766 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5767 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5768 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5769 if (!cisdense) { 5770 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5771 } 5772 ierr = MatSetUp(C);CHKERRQ(ierr); 5773 5774 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5775 PetscFunctionReturn(0); 5776 } 5777 5778 /* ----------------------------------------------------------------*/ 5779 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5780 { 5781 Mat_Product *product = C->product; 5782 Mat A = product->A,B=product->B; 5783 5784 PetscFunctionBegin; 5785 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5786 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5787 5788 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5789 C->ops->productsymbolic = MatProductSymbolic_AB; 5790 PetscFunctionReturn(0); 5791 } 5792 5793 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5794 { 5795 PetscErrorCode ierr; 5796 Mat_Product *product = C->product; 5797 5798 PetscFunctionBegin; 5799 if (product->type == MATPRODUCT_AB) { 5800 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5801 } 5802 PetscFunctionReturn(0); 5803 } 5804 /* ----------------------------------------------------------------*/ 5805 5806 /*MC 5807 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5808 5809 Options Database Keys: 5810 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5811 5812 Level: beginner 5813 5814 Notes: 5815 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5816 in this case the values associated with the rows and columns one passes in are set to zero 5817 in the matrix 5818 5819 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5820 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5821 5822 .seealso: MatCreateAIJ() 5823 M*/ 5824 5825 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5826 { 5827 Mat_MPIAIJ *b; 5828 PetscErrorCode ierr; 5829 PetscMPIInt size; 5830 5831 PetscFunctionBegin; 5832 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5833 5834 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5835 B->data = (void*)b; 5836 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5837 B->assembled = PETSC_FALSE; 5838 B->insertmode = NOT_SET_VALUES; 5839 b->size = size; 5840 5841 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5842 5843 /* build cache for off array entries formed */ 5844 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5845 5846 b->donotstash = PETSC_FALSE; 5847 b->colmap = 0; 5848 b->garray = 0; 5849 b->roworiented = PETSC_TRUE; 5850 5851 /* stuff used for matrix vector multiply */ 5852 b->lvec = NULL; 5853 b->Mvctx = NULL; 5854 5855 /* stuff for MatGetRow() */ 5856 b->rowindices = 0; 5857 b->rowvalues = 0; 5858 b->getrowactive = PETSC_FALSE; 5859 5860 /* flexible pointer used in CUSP/CUSPARSE classes */ 5861 b->spptr = NULL; 5862 5863 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5864 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5865 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5866 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5867 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5868 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5869 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5870 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5871 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5872 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5873 #if defined(PETSC_HAVE_MKL_SPARSE) 5874 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5875 #endif 5876 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5877 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 5878 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5879 #if defined(PETSC_HAVE_ELEMENTAL) 5880 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5881 #endif 5882 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5883 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5884 #if defined(PETSC_HAVE_HYPRE) 5885 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5886 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5887 #endif 5888 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 5889 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 5890 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5891 PetscFunctionReturn(0); 5892 } 5893 5894 /*@C 5895 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5896 and "off-diagonal" part of the matrix in CSR format. 5897 5898 Collective 5899 5900 Input Parameters: 5901 + comm - MPI communicator 5902 . m - number of local rows (Cannot be PETSC_DECIDE) 5903 . n - This value should be the same as the local size used in creating the 5904 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5905 calculated if N is given) For square matrices n is almost always m. 5906 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5907 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5908 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5909 . j - column indices 5910 . a - matrix values 5911 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5912 . oj - column indices 5913 - oa - matrix values 5914 5915 Output Parameter: 5916 . mat - the matrix 5917 5918 Level: advanced 5919 5920 Notes: 5921 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5922 must free the arrays once the matrix has been destroyed and not before. 5923 5924 The i and j indices are 0 based 5925 5926 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5927 5928 This sets local rows and cannot be used to set off-processor values. 5929 5930 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5931 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5932 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5933 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5934 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5935 communication if it is known that only local entries will be set. 5936 5937 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5938 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5939 @*/ 5940 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5941 { 5942 PetscErrorCode ierr; 5943 Mat_MPIAIJ *maij; 5944 5945 PetscFunctionBegin; 5946 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5947 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5948 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5949 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5950 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5951 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5952 maij = (Mat_MPIAIJ*) (*mat)->data; 5953 5954 (*mat)->preallocated = PETSC_TRUE; 5955 5956 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5957 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5958 5959 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5960 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5961 5962 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5963 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5964 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5965 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5966 5967 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5968 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5969 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5970 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5971 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5972 PetscFunctionReturn(0); 5973 } 5974 5975 /* 5976 Special version for direct calls from Fortran 5977 */ 5978 #include <petsc/private/fortranimpl.h> 5979 5980 /* Change these macros so can be used in void function */ 5981 #undef CHKERRQ 5982 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5983 #undef SETERRQ2 5984 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5985 #undef SETERRQ3 5986 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5987 #undef SETERRQ 5988 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5989 5990 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5991 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5992 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5993 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5994 #else 5995 #endif 5996 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5997 { 5998 Mat mat = *mmat; 5999 PetscInt m = *mm, n = *mn; 6000 InsertMode addv = *maddv; 6001 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6002 PetscScalar value; 6003 PetscErrorCode ierr; 6004 6005 MatCheckPreallocated(mat,1); 6006 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6007 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6008 { 6009 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6010 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6011 PetscBool roworiented = aij->roworiented; 6012 6013 /* Some Variables required in the macro */ 6014 Mat A = aij->A; 6015 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6016 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6017 MatScalar *aa = a->a; 6018 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6019 Mat B = aij->B; 6020 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6021 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6022 MatScalar *ba = b->a; 6023 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6024 * cannot use "#if defined" inside a macro. */ 6025 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6026 6027 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6028 PetscInt nonew = a->nonew; 6029 MatScalar *ap1,*ap2; 6030 6031 PetscFunctionBegin; 6032 for (i=0; i<m; i++) { 6033 if (im[i] < 0) continue; 6034 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6035 if (im[i] >= rstart && im[i] < rend) { 6036 row = im[i] - rstart; 6037 lastcol1 = -1; 6038 rp1 = aj + ai[row]; 6039 ap1 = aa + ai[row]; 6040 rmax1 = aimax[row]; 6041 nrow1 = ailen[row]; 6042 low1 = 0; 6043 high1 = nrow1; 6044 lastcol2 = -1; 6045 rp2 = bj + bi[row]; 6046 ap2 = ba + bi[row]; 6047 rmax2 = bimax[row]; 6048 nrow2 = bilen[row]; 6049 low2 = 0; 6050 high2 = nrow2; 6051 6052 for (j=0; j<n; j++) { 6053 if (roworiented) value = v[i*n+j]; 6054 else value = v[i+j*m]; 6055 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6056 if (in[j] >= cstart && in[j] < cend) { 6057 col = in[j] - cstart; 6058 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6059 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6060 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6061 #endif 6062 } else if (in[j] < 0) continue; 6063 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6064 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6065 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6066 } else { 6067 if (mat->was_assembled) { 6068 if (!aij->colmap) { 6069 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6070 } 6071 #if defined(PETSC_USE_CTABLE) 6072 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6073 col--; 6074 #else 6075 col = aij->colmap[in[j]] - 1; 6076 #endif 6077 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6078 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6079 col = in[j]; 6080 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6081 B = aij->B; 6082 b = (Mat_SeqAIJ*)B->data; 6083 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6084 rp2 = bj + bi[row]; 6085 ap2 = ba + bi[row]; 6086 rmax2 = bimax[row]; 6087 nrow2 = bilen[row]; 6088 low2 = 0; 6089 high2 = nrow2; 6090 bm = aij->B->rmap->n; 6091 ba = b->a; 6092 inserted = PETSC_FALSE; 6093 } 6094 } else col = in[j]; 6095 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6096 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6097 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6098 #endif 6099 } 6100 } 6101 } else if (!aij->donotstash) { 6102 if (roworiented) { 6103 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6104 } else { 6105 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6106 } 6107 } 6108 } 6109 } 6110 PetscFunctionReturnVoid(); 6111 } 6112