1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 796 797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 803 PetscFunctionBegin; 804 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 805 806 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 807 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 808 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 813 { 814 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 816 PetscErrorCode ierr; 817 PetscMPIInt n; 818 PetscInt i,j,rstart,ncols,flg; 819 PetscInt *row,*col; 820 PetscBool other_disassembled; 821 PetscScalar *val; 822 823 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 824 825 PetscFunctionBegin; 826 if (!aij->donotstash && !mat->nooffprocentries) { 827 while (1) { 828 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 829 if (!flg) break; 830 831 for (i=0; i<n; ) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j=i,rstart=row[j]; j<n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j-i; 837 else ncols = n-i; 838 /* Now assemble all these values with a single function call */ 839 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 840 i = j; 841 } 842 } 843 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 844 } 845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 846 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 847 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 848 if (mat->boundtocpu) { 849 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 850 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 851 } 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = 0; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscMPIInt p = 0; 993 PetscSFNode *rrows; 994 PetscSF sf; 995 const PetscScalar *xx; 996 PetscScalar *bb,*mask; 997 Vec xmask,lmask; 998 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 999 const PetscInt *aj, *ii,*ridx; 1000 PetscScalar *aa; 1001 1002 PetscFunctionBegin; 1003 /* Create SF where leaves are input rows and roots are owned rows */ 1004 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1005 for (r = 0; r < n; ++r) lrows[r] = -1; 1006 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1007 for (r = 0; r < N; ++r) { 1008 const PetscInt idx = rows[r]; 1009 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1010 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1011 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1012 } 1013 rrows[r].rank = p; 1014 rrows[r].index = rows[r] - owners[p]; 1015 } 1016 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1017 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1018 /* Collect flags for rows to be zeroed */ 1019 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1022 /* Compress and put in row numbers */ 1023 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1024 /* zero diagonal part of matrix */ 1025 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1026 /* handle off diagonal part of matrix */ 1027 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1028 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1029 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1030 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1036 PetscBool cong; 1037 1038 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1039 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1040 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1043 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1044 } 1045 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1046 /* remove zeroed rows of off diagonal matrix */ 1047 ii = aij->i; 1048 for (i=0; i<len; i++) { 1049 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1050 } 1051 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1052 if (aij->compressedrow.use) { 1053 m = aij->compressedrow.nrows; 1054 ii = aij->compressedrow.i; 1055 ridx = aij->compressedrow.rindex; 1056 for (i=0; i<m; i++) { 1057 n = ii[i+1] - ii[i]; 1058 aj = aij->j + ii[i]; 1059 aa = aij->a + ii[i]; 1060 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[*ridx] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 ridx++; 1070 } 1071 } else { /* do not use compressed row format */ 1072 m = l->B->rmap->n; 1073 for (i=0; i<m; i++) { 1074 n = ii[i+1] - ii[i]; 1075 aj = aij->j + ii[i]; 1076 aa = aij->a + ii[i]; 1077 for (j=0; j<n; j++) { 1078 if (PetscAbsScalar(mask[*aj])) { 1079 if (b) bb[i] -= *aa*xx[*aj]; 1080 *aa = 0.0; 1081 } 1082 aa++; 1083 aj++; 1084 } 1085 } 1086 } 1087 if (x && b) { 1088 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1089 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1090 } 1091 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1092 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1093 ierr = PetscFree(lrows);CHKERRQ(ierr); 1094 1095 /* only change matrix nonzero state if pattern was allowed to be changed */ 1096 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1097 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1098 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 VecScatter Mvctx = a->Mvctx; 1109 1110 PetscFunctionBegin; 1111 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1112 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1113 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 VecScatter Mvctx = a->Mvctx; 1135 1136 PetscFunctionBegin; 1137 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1138 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* do local part */ 1154 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1155 /* add partial results together */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1162 { 1163 MPI_Comm comm; 1164 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1165 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1166 IS Me,Notme; 1167 PetscErrorCode ierr; 1168 PetscInt M,N,first,last,*notme,i; 1169 PetscBool lf; 1170 PetscMPIInt size; 1171 1172 PetscFunctionBegin; 1173 /* Easy test: symmetric diagonal block */ 1174 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1175 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1176 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1177 if (!*f) PetscFunctionReturn(0); 1178 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1180 if (size == 1) PetscFunctionReturn(0); 1181 1182 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1183 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1184 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1186 for (i=0; i<first; i++) notme[i] = i; 1187 for (i=last; i<M; i++) notme[i-last+first] = i; 1188 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1189 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1190 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1191 Aoff = Aoffs[0]; 1192 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1193 Boff = Boffs[0]; 1194 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1195 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1197 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1199 ierr = PetscFree(notme);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1213 { 1214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 /* do nondiagonal part */ 1219 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1220 /* do local part */ 1221 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1222 /* add partial results together */ 1223 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1233 { 1234 PetscErrorCode ierr; 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 1237 PetscFunctionBegin; 1238 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1239 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1245 { 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1251 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1256 { 1257 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 #if defined(PETSC_USE_LOG) 1262 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1263 #endif 1264 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1266 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1268 #if defined(PETSC_USE_CTABLE) 1269 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1270 #else 1271 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1272 #endif 1273 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1274 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1275 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1276 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1277 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1278 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1279 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1280 1281 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1282 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1283 1284 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1294 #if defined(PETSC_HAVE_ELEMENTAL) 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 #if defined(PETSC_HAVE_SCALAPACK) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 #if defined(PETSC_HAVE_HYPRE) 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 #endif 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 const PetscInt *garray = aij->garray; 1316 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1317 PetscInt *rowlens; 1318 PetscInt *colidxs; 1319 PetscScalar *matvals; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1324 1325 M = mat->rmap->N; 1326 N = mat->cmap->N; 1327 m = mat->rmap->n; 1328 rs = mat->rmap->rstart; 1329 cs = mat->cmap->rstart; 1330 nz = A->nz + B->nz; 1331 1332 /* write matrix header */ 1333 header[0] = MAT_FILE_CLASSID; 1334 header[1] = M; header[2] = N; header[3] = nz; 1335 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1336 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1337 1338 /* fill in and store row lengths */ 1339 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1340 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1341 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1342 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1343 1344 /* fill in and store column indices */ 1345 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1346 for (cnt=0, i=0; i<m; i++) { 1347 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1348 if (garray[B->j[jb]] > cs) break; 1349 colidxs[cnt++] = garray[B->j[jb]]; 1350 } 1351 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1352 colidxs[cnt++] = A->j[ja] + cs; 1353 for (; jb<B->i[i+1]; jb++) 1354 colidxs[cnt++] = garray[B->j[jb]]; 1355 } 1356 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1357 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1358 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1359 1360 /* fill in and store nonzero values */ 1361 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1362 for (cnt=0, i=0; i<m; i++) { 1363 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1364 if (garray[B->j[jb]] > cs) break; 1365 matvals[cnt++] = B->a[jb]; 1366 } 1367 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1368 matvals[cnt++] = A->a[ja]; 1369 for (; jb<B->i[i+1]; jb++) 1370 matvals[cnt++] = B->a[jb]; 1371 } 1372 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1373 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1374 ierr = PetscFree(matvals);CHKERRQ(ierr); 1375 1376 /* write block size option to the viewer's .info file */ 1377 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #include <petscdraw.h> 1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1383 { 1384 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1385 PetscErrorCode ierr; 1386 PetscMPIInt rank = aij->rank,size = aij->size; 1387 PetscBool isdraw,iascii,isbinary; 1388 PetscViewer sviewer; 1389 PetscViewerFormat format; 1390 1391 PetscFunctionBegin; 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1393 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1394 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1395 if (iascii) { 1396 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1397 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1398 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1399 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1400 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1401 for (i=0; i<(PetscInt)size; i++) { 1402 nmax = PetscMax(nmax,nz[i]); 1403 nmin = PetscMin(nmin,nz[i]); 1404 navg += nz[i]; 1405 } 1406 ierr = PetscFree(nz);CHKERRQ(ierr); 1407 navg = navg/size; 1408 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1412 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1413 MatInfo info; 1414 PetscBool inodes; 1415 1416 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1417 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1418 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1420 if (!inodes) { 1421 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1422 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1423 } else { 1424 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1425 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1426 } 1427 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1429 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1431 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1432 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1434 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1437 PetscInt inodecount,inodelimit,*inodes; 1438 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1439 if (inodes) { 1440 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1441 } else { 1442 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1446 PetscFunctionReturn(0); 1447 } 1448 } else if (isbinary) { 1449 if (size == 1) { 1450 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1451 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } else if (iascii && size == 1) { 1457 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1458 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } else if (isdraw) { 1461 PetscDraw draw; 1462 PetscBool isnull; 1463 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1464 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1465 if (isnull) PetscFunctionReturn(0); 1466 } 1467 1468 { /* assemble the entire matrix onto first processor */ 1469 Mat A = NULL, Av; 1470 IS isrow,iscol; 1471 1472 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1473 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1474 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1475 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1476 /* The commented code uses MatCreateSubMatrices instead */ 1477 /* 1478 Mat *AA, A = NULL, Av; 1479 IS isrow,iscol; 1480 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1483 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1484 if (!rank) { 1485 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1486 A = AA[0]; 1487 Av = AA[0]; 1488 } 1489 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1490 */ 1491 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1492 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1493 /* 1494 Everyone has to call to draw the matrix since the graphics waits are 1495 synchronized across all processors that share the PetscDraw object 1496 */ 1497 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1498 if (!rank) { 1499 if (((PetscObject)mat)->name) { 1500 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1501 } 1502 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1503 } 1504 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1506 ierr = MatDestroy(&A);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1512 { 1513 PetscErrorCode ierr; 1514 PetscBool iascii,isdraw,issocket,isbinary; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1519 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1521 if (iascii || isdraw || isbinary || issocket) { 1522 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1528 { 1529 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1530 PetscErrorCode ierr; 1531 Vec bb1 = 0; 1532 PetscBool hasop; 1533 1534 PetscFunctionBegin; 1535 if (flag == SOR_APPLY_UPPER) { 1536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1541 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1542 } 1543 1544 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1545 if (flag & SOR_ZERO_INITIAL_GUESS) { 1546 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1547 its--; 1548 } 1549 1550 while (its--) { 1551 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1553 1554 /* update rhs: bb1 = bb - B*x */ 1555 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1556 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1557 1558 /* local sweep */ 1559 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1560 } 1561 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1562 if (flag & SOR_ZERO_INITIAL_GUESS) { 1563 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1564 its--; 1565 } 1566 while (its--) { 1567 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 1570 /* update rhs: bb1 = bb - B*x */ 1571 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1572 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1573 1574 /* local sweep */ 1575 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1576 } 1577 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1578 if (flag & SOR_ZERO_INITIAL_GUESS) { 1579 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1580 its--; 1581 } 1582 while (its--) { 1583 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 1586 /* update rhs: bb1 = bb - B*x */ 1587 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1588 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1589 1590 /* local sweep */ 1591 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1592 } 1593 } else if (flag & SOR_EISENSTAT) { 1594 Vec xx1; 1595 1596 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1598 1599 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1600 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 if (!mat->diag) { 1602 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1603 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1604 } 1605 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1606 if (hasop) { 1607 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1608 } else { 1609 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1610 } 1611 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1612 1613 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1614 1615 /* local sweep */ 1616 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1617 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1618 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1619 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1620 1621 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1622 1623 matin->factorerrortype = mat->A->factorerrortype; 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1628 { 1629 Mat aA,aB,Aperm; 1630 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1631 PetscScalar *aa,*ba; 1632 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1633 PetscSF rowsf,sf; 1634 IS parcolp = NULL; 1635 PetscBool done; 1636 PetscErrorCode ierr; 1637 1638 PetscFunctionBegin; 1639 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1640 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1641 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1642 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1643 1644 /* Invert row permutation to find out where my rows should go */ 1645 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1646 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1647 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1649 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1650 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1651 1652 /* Invert column permutation to find out where my columns should go */ 1653 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1654 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1655 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1656 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1657 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1658 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1660 1661 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1662 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1663 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1664 1665 /* Find out where my gcols should go */ 1666 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1667 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1668 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1669 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1670 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1671 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1672 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1674 1675 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1676 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1677 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1678 for (i=0; i<m; i++) { 1679 PetscInt row = rdest[i]; 1680 PetscMPIInt rowner; 1681 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1682 for (j=ai[i]; j<ai[i+1]; j++) { 1683 PetscInt col = cdest[aj[j]]; 1684 PetscMPIInt cowner; 1685 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1686 if (rowner == cowner) dnnz[i]++; 1687 else onnz[i]++; 1688 } 1689 for (j=bi[i]; j<bi[i+1]; j++) { 1690 PetscInt col = gcdest[bj[j]]; 1691 PetscMPIInt cowner; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscLogDouble isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 MatCheckPreallocated(A,1); 1807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1808 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1809 break; 1810 case MAT_ROW_ORIENTED: 1811 MatCheckPreallocated(A,1); 1812 a->roworiented = flg; 1813 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 case MAT_SORTED_FULL: 1819 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1820 break; 1821 case MAT_IGNORE_OFF_PROC_ENTRIES: 1822 a->donotstash = flg; 1823 break; 1824 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1825 case MAT_SPD: 1826 case MAT_SYMMETRIC: 1827 case MAT_STRUCTURALLY_SYMMETRIC: 1828 case MAT_HERMITIAN: 1829 case MAT_SYMMETRY_ETERNAL: 1830 break; 1831 case MAT_SUBMAT_SINGLEIS: 1832 A->submat_singleis = flg; 1833 break; 1834 case MAT_STRUCTURE_ONLY: 1835 /* The option is handled directly by MatSetOption() */ 1836 break; 1837 default: 1838 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842 1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1844 { 1845 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1846 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1847 PetscErrorCode ierr; 1848 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1849 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1850 PetscInt *cmap,*idx_p; 1851 1852 PetscFunctionBegin; 1853 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1854 mat->getrowactive = PETSC_TRUE; 1855 1856 if (!mat->rowvalues && (idx || v)) { 1857 /* 1858 allocate enough space to hold information from the longest row. 1859 */ 1860 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1861 PetscInt max = 1,tmp; 1862 for (i=0; i<matin->rmap->n; i++) { 1863 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1864 if (max < tmp) max = tmp; 1865 } 1866 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1867 } 1868 1869 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1870 lrow = row - rstart; 1871 1872 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1873 if (!v) {pvA = 0; pvB = 0;} 1874 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1875 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1876 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1877 nztot = nzA + nzB; 1878 1879 cmap = mat->garray; 1880 if (v || idx) { 1881 if (nztot) { 1882 /* Sort by increasing column numbers, assuming A and B already sorted */ 1883 PetscInt imark = -1; 1884 if (v) { 1885 *v = v_p = mat->rowvalues; 1886 for (i=0; i<nzB; i++) { 1887 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1888 else break; 1889 } 1890 imark = i; 1891 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1892 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1893 } 1894 if (idx) { 1895 *idx = idx_p = mat->rowindices; 1896 if (imark > -1) { 1897 for (i=0; i<imark; i++) { 1898 idx_p[i] = cmap[cworkB[i]]; 1899 } 1900 } else { 1901 for (i=0; i<nzB; i++) { 1902 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1903 else break; 1904 } 1905 imark = i; 1906 } 1907 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1908 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1909 } 1910 } else { 1911 if (idx) *idx = 0; 1912 if (v) *v = 0; 1913 } 1914 } 1915 *nz = nztot; 1916 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1917 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1922 { 1923 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1924 1925 PetscFunctionBegin; 1926 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1927 aij->getrowactive = PETSC_FALSE; 1928 PetscFunctionReturn(0); 1929 } 1930 1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1932 { 1933 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1934 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1935 PetscErrorCode ierr; 1936 PetscInt i,j,cstart = mat->cmap->rstart; 1937 PetscReal sum = 0.0; 1938 MatScalar *v; 1939 1940 PetscFunctionBegin; 1941 if (aij->size == 1) { 1942 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1943 } else { 1944 if (type == NORM_FROBENIUS) { 1945 v = amat->a; 1946 for (i=0; i<amat->nz; i++) { 1947 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1948 } 1949 v = bmat->a; 1950 for (i=0; i<bmat->nz; i++) { 1951 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1952 } 1953 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1954 *norm = PetscSqrtReal(*norm); 1955 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1956 } else if (type == NORM_1) { /* max column norm */ 1957 PetscReal *tmp,*tmp2; 1958 PetscInt *jj,*garray = aij->garray; 1959 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1960 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1961 *norm = 0.0; 1962 v = amat->a; jj = amat->j; 1963 for (j=0; j<amat->nz; j++) { 1964 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1965 } 1966 v = bmat->a; jj = bmat->j; 1967 for (j=0; j<bmat->nz; j++) { 1968 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1969 } 1970 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1971 for (j=0; j<mat->cmap->N; j++) { 1972 if (tmp2[j] > *norm) *norm = tmp2[j]; 1973 } 1974 ierr = PetscFree(tmp);CHKERRQ(ierr); 1975 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1976 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1977 } else if (type == NORM_INFINITY) { /* max row norm */ 1978 PetscReal ntemp = 0.0; 1979 for (j=0; j<aij->A->rmap->n; j++) { 1980 v = amat->a + amat->i[j]; 1981 sum = 0.0; 1982 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1983 sum += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a + bmat->i[j]; 1986 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 if (sum > ntemp) ntemp = sum; 1990 } 1991 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1992 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1993 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1999 { 2000 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2001 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2002 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2003 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2004 PetscErrorCode ierr; 2005 Mat B,A_diag,*B_diag; 2006 const MatScalar *array; 2007 2008 PetscFunctionBegin; 2009 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2010 ai = Aloc->i; aj = Aloc->j; 2011 bi = Bloc->i; bj = Bloc->j; 2012 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2013 PetscInt *d_nnz,*g_nnz,*o_nnz; 2014 PetscSFNode *oloc; 2015 PETSC_UNUSED PetscSF sf; 2016 2017 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2018 /* compute d_nnz for preallocation */ 2019 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2020 for (i=0; i<ai[ma]; i++) { 2021 d_nnz[aj[i]]++; 2022 } 2023 /* compute local off-diagonal contributions */ 2024 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2025 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2026 /* map those to global */ 2027 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2028 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2029 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2030 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2031 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2032 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2033 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2034 2035 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2036 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2037 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2038 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2039 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2040 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2041 } else { 2042 B = *matout; 2043 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2044 } 2045 2046 b = (Mat_MPIAIJ*)B->data; 2047 A_diag = a->A; 2048 B_diag = &b->A; 2049 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2050 A_diag_ncol = A_diag->cmap->N; 2051 B_diag_ilen = sub_B_diag->ilen; 2052 B_diag_i = sub_B_diag->i; 2053 2054 /* Set ilen for diagonal of B */ 2055 for (i=0; i<A_diag_ncol; i++) { 2056 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2057 } 2058 2059 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2060 very quickly (=without using MatSetValues), because all writes are local. */ 2061 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2062 2063 /* copy over the B part */ 2064 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2065 array = Bloc->a; 2066 row = A->rmap->rstart; 2067 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2068 cols_tmp = cols; 2069 for (i=0; i<mb; i++) { 2070 ncol = bi[i+1]-bi[i]; 2071 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2072 row++; 2073 array += ncol; cols_tmp += ncol; 2074 } 2075 ierr = PetscFree(cols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2080 *matout = B; 2081 } else { 2082 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2088 { 2089 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2090 Mat a = aij->A,b = aij->B; 2091 PetscErrorCode ierr; 2092 PetscInt s1,s2,s3; 2093 2094 PetscFunctionBegin; 2095 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2096 if (rr) { 2097 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2098 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2099 /* Overlap communication with computation. */ 2100 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 } 2102 if (ll) { 2103 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2104 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2105 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2106 } 2107 /* scale the diagonal block */ 2108 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2109 2110 if (rr) { 2111 /* Do a scatter end and then right scale the off-diagonal block */ 2112 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2119 { 2120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2129 { 2130 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2131 Mat a,b,c,d; 2132 PetscBool flg; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 a = matA->A; b = matA->B; 2137 c = matB->A; d = matB->B; 2138 2139 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2140 if (flg) { 2141 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2142 } 2143 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2152 2153 PetscFunctionBegin; 2154 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2155 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2156 /* because of the column compression in the off-processor part of the matrix a->B, 2157 the number of columns in a->B and b->B may be different, hence we cannot call 2158 the MatCopy() directly on the two parts. If need be, we can provide a more 2159 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2160 then copying the submatrices */ 2161 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2162 } else { 2163 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2164 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2165 } 2166 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* 2180 Computes the number of nonzeros per row needed for preallocation when X and Y 2181 have different nonzero structure. 2182 */ 2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2184 { 2185 PetscInt i,j,k,nzx,nzy; 2186 2187 PetscFunctionBegin; 2188 /* Set the number of nonzeros in the new matrix */ 2189 for (i=0; i<m; i++) { 2190 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2191 nzx = xi[i+1] - xi[i]; 2192 nzy = yi[i+1] - yi[i]; 2193 nnz[i] = 0; 2194 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2195 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2196 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2197 nnz[i]++; 2198 } 2199 for (; k<nzy; k++) nnz[i]++; 2200 } 2201 PetscFunctionReturn(0); 2202 } 2203 2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt m = Y->rmap->N; 2209 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2210 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2211 2212 PetscFunctionBegin; 2213 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2214 PetscFunctionReturn(0); 2215 } 2216 2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2218 { 2219 PetscErrorCode ierr; 2220 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2221 PetscBLASInt bnz,one=1; 2222 Mat_SeqAIJ *x,*y; 2223 2224 PetscFunctionBegin; 2225 if (str == SAME_NONZERO_PATTERN) { 2226 PetscScalar alpha = a; 2227 x = (Mat_SeqAIJ*)xx->A->data; 2228 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2229 y = (Mat_SeqAIJ*)yy->A->data; 2230 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2231 x = (Mat_SeqAIJ*)xx->B->data; 2232 y = (Mat_SeqAIJ*)yy->B->data; 2233 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2234 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2235 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2236 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2237 will be updated */ 2238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2239 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2240 Y->offloadmask = PETSC_OFFLOAD_CPU; 2241 } 2242 #endif 2243 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2244 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2245 } else { 2246 Mat B; 2247 PetscInt *nnz_d,*nnz_o; 2248 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2249 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2250 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2251 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2252 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2253 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2254 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2255 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2256 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2257 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2258 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2259 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2260 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2261 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2262 } 2263 PetscFunctionReturn(0); 2264 } 2265 2266 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2267 2268 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2269 { 2270 #if defined(PETSC_USE_COMPLEX) 2271 PetscErrorCode ierr; 2272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2273 2274 PetscFunctionBegin; 2275 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2276 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2277 #else 2278 PetscFunctionBegin; 2279 #endif 2280 PetscFunctionReturn(0); 2281 } 2282 2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2284 { 2285 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2286 PetscErrorCode ierr; 2287 2288 PetscFunctionBegin; 2289 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2290 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2295 { 2296 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2297 PetscErrorCode ierr; 2298 2299 PetscFunctionBegin; 2300 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2301 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 2305 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2306 { 2307 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2308 PetscErrorCode ierr; 2309 PetscInt i,*idxb = 0; 2310 PetscScalar *va,*vb; 2311 Vec vtmp; 2312 2313 PetscFunctionBegin; 2314 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2315 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2316 if (idx) { 2317 for (i=0; i<A->rmap->n; i++) { 2318 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2319 } 2320 } 2321 2322 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2323 if (idx) { 2324 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2325 } 2326 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2327 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2328 2329 for (i=0; i<A->rmap->n; i++) { 2330 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2331 va[i] = vb[i]; 2332 if (idx) idx[i] = a->garray[idxb[i]]; 2333 } 2334 } 2335 2336 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2337 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2338 ierr = PetscFree(idxb);CHKERRQ(ierr); 2339 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2344 { 2345 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2346 PetscErrorCode ierr; 2347 PetscInt i,*idxb = 0; 2348 PetscScalar *va,*vb; 2349 Vec vtmp; 2350 2351 PetscFunctionBegin; 2352 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2353 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2354 if (idx) { 2355 for (i=0; i<A->cmap->n; i++) { 2356 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2357 } 2358 } 2359 2360 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2361 if (idx) { 2362 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2363 } 2364 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2365 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2366 2367 for (i=0; i<A->rmap->n; i++) { 2368 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2369 va[i] = vb[i]; 2370 if (idx) idx[i] = a->garray[idxb[i]]; 2371 } 2372 } 2373 2374 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2375 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2376 ierr = PetscFree(idxb);CHKERRQ(ierr); 2377 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2382 { 2383 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2384 PetscInt n = A->rmap->n; 2385 PetscInt cstart = A->cmap->rstart; 2386 PetscInt *cmap = mat->garray; 2387 PetscInt *diagIdx, *offdiagIdx; 2388 Vec diagV, offdiagV; 2389 PetscScalar *a, *diagA, *offdiagA; 2390 PetscInt r; 2391 PetscErrorCode ierr; 2392 2393 PetscFunctionBegin; 2394 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2395 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2396 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2397 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2398 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2399 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2400 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2401 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2402 for (r = 0; r < n; ++r) { 2403 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2404 a[r] = diagA[r]; 2405 idx[r] = cstart + diagIdx[r]; 2406 } else { 2407 a[r] = offdiagA[r]; 2408 idx[r] = cmap[offdiagIdx[r]]; 2409 } 2410 } 2411 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2412 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2413 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2414 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2415 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2416 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2421 { 2422 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2423 PetscInt n = A->rmap->n; 2424 PetscInt cstart = A->cmap->rstart; 2425 PetscInt *cmap = mat->garray; 2426 PetscInt *diagIdx, *offdiagIdx; 2427 Vec diagV, offdiagV; 2428 PetscScalar *a, *diagA, *offdiagA; 2429 PetscInt r; 2430 PetscErrorCode ierr; 2431 2432 PetscFunctionBegin; 2433 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2434 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2435 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2436 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2437 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2438 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2439 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2440 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2441 for (r = 0; r < n; ++r) { 2442 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2443 a[r] = diagA[r]; 2444 idx[r] = cstart + diagIdx[r]; 2445 } else { 2446 a[r] = offdiagA[r]; 2447 idx[r] = cmap[offdiagIdx[r]]; 2448 } 2449 } 2450 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2451 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2452 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2453 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2454 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2455 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2456 PetscFunctionReturn(0); 2457 } 2458 2459 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2460 { 2461 PetscErrorCode ierr; 2462 Mat *dummy; 2463 2464 PetscFunctionBegin; 2465 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2466 *newmat = *dummy; 2467 ierr = PetscFree(dummy);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2472 { 2473 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2474 PetscErrorCode ierr; 2475 2476 PetscFunctionBegin; 2477 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2478 A->factorerrortype = a->A->factorerrortype; 2479 PetscFunctionReturn(0); 2480 } 2481 2482 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2483 { 2484 PetscErrorCode ierr; 2485 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2486 2487 PetscFunctionBegin; 2488 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2489 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2490 if (x->assembled) { 2491 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2492 } else { 2493 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2494 } 2495 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2496 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2501 { 2502 PetscFunctionBegin; 2503 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2504 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2505 PetscFunctionReturn(0); 2506 } 2507 2508 /*@ 2509 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2510 2511 Collective on Mat 2512 2513 Input Parameters: 2514 + A - the matrix 2515 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2516 2517 Level: advanced 2518 2519 @*/ 2520 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2521 { 2522 PetscErrorCode ierr; 2523 2524 PetscFunctionBegin; 2525 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2526 PetscFunctionReturn(0); 2527 } 2528 2529 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2530 { 2531 PetscErrorCode ierr; 2532 PetscBool sc = PETSC_FALSE,flg; 2533 2534 PetscFunctionBegin; 2535 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2536 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2537 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2538 if (flg) { 2539 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2540 } 2541 ierr = PetscOptionsTail();CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2546 { 2547 PetscErrorCode ierr; 2548 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2549 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2550 2551 PetscFunctionBegin; 2552 if (!Y->preallocated) { 2553 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2554 } else if (!aij->nz) { 2555 PetscInt nonew = aij->nonew; 2556 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2557 aij->nonew = nonew; 2558 } 2559 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2560 PetscFunctionReturn(0); 2561 } 2562 2563 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2564 { 2565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2566 PetscErrorCode ierr; 2567 2568 PetscFunctionBegin; 2569 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2570 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2571 if (d) { 2572 PetscInt rstart; 2573 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2574 *d += rstart; 2575 2576 } 2577 PetscFunctionReturn(0); 2578 } 2579 2580 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2581 { 2582 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2583 PetscErrorCode ierr; 2584 2585 PetscFunctionBegin; 2586 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 /* -------------------------------------------------------------------*/ 2591 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2592 MatGetRow_MPIAIJ, 2593 MatRestoreRow_MPIAIJ, 2594 MatMult_MPIAIJ, 2595 /* 4*/ MatMultAdd_MPIAIJ, 2596 MatMultTranspose_MPIAIJ, 2597 MatMultTransposeAdd_MPIAIJ, 2598 0, 2599 0, 2600 0, 2601 /*10*/ 0, 2602 0, 2603 0, 2604 MatSOR_MPIAIJ, 2605 MatTranspose_MPIAIJ, 2606 /*15*/ MatGetInfo_MPIAIJ, 2607 MatEqual_MPIAIJ, 2608 MatGetDiagonal_MPIAIJ, 2609 MatDiagonalScale_MPIAIJ, 2610 MatNorm_MPIAIJ, 2611 /*20*/ MatAssemblyBegin_MPIAIJ, 2612 MatAssemblyEnd_MPIAIJ, 2613 MatSetOption_MPIAIJ, 2614 MatZeroEntries_MPIAIJ, 2615 /*24*/ MatZeroRows_MPIAIJ, 2616 0, 2617 0, 2618 0, 2619 0, 2620 /*29*/ MatSetUp_MPIAIJ, 2621 0, 2622 0, 2623 MatGetDiagonalBlock_MPIAIJ, 2624 0, 2625 /*34*/ MatDuplicate_MPIAIJ, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /*39*/ MatAXPY_MPIAIJ, 2631 MatCreateSubMatrices_MPIAIJ, 2632 MatIncreaseOverlap_MPIAIJ, 2633 MatGetValues_MPIAIJ, 2634 MatCopy_MPIAIJ, 2635 /*44*/ MatGetRowMax_MPIAIJ, 2636 MatScale_MPIAIJ, 2637 MatShift_MPIAIJ, 2638 MatDiagonalSet_MPIAIJ, 2639 MatZeroRowsColumns_MPIAIJ, 2640 /*49*/ MatSetRandom_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 0, 2645 /*54*/ MatFDColoringCreate_MPIXAIJ, 2646 0, 2647 MatSetUnfactored_MPIAIJ, 2648 MatPermute_MPIAIJ, 2649 0, 2650 /*59*/ MatCreateSubMatrix_MPIAIJ, 2651 MatDestroy_MPIAIJ, 2652 MatView_MPIAIJ, 2653 0, 2654 0, 2655 /*64*/ 0, 2656 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2657 0, 2658 0, 2659 0, 2660 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2661 MatGetRowMinAbs_MPIAIJ, 2662 0, 2663 0, 2664 0, 2665 0, 2666 /*75*/ MatFDColoringApply_AIJ, 2667 MatSetFromOptions_MPIAIJ, 2668 0, 2669 0, 2670 MatFindZeroDiagonals_MPIAIJ, 2671 /*80*/ 0, 2672 0, 2673 0, 2674 /*83*/ MatLoad_MPIAIJ, 2675 MatIsSymmetric_MPIAIJ, 2676 0, 2677 0, 2678 0, 2679 0, 2680 /*89*/ 0, 2681 0, 2682 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2683 0, 2684 0, 2685 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2686 0, 2687 0, 2688 0, 2689 MatBindToCPU_MPIAIJ, 2690 /*99*/ MatProductSetFromOptions_MPIAIJ, 2691 0, 2692 0, 2693 MatConjugate_MPIAIJ, 2694 0, 2695 /*104*/MatSetValuesRow_MPIAIJ, 2696 MatRealPart_MPIAIJ, 2697 MatImaginaryPart_MPIAIJ, 2698 0, 2699 0, 2700 /*109*/0, 2701 0, 2702 MatGetRowMin_MPIAIJ, 2703 0, 2704 MatMissingDiagonal_MPIAIJ, 2705 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2706 0, 2707 MatGetGhosts_MPIAIJ, 2708 0, 2709 0, 2710 /*119*/0, 2711 0, 2712 0, 2713 0, 2714 MatGetMultiProcBlock_MPIAIJ, 2715 /*124*/MatFindNonzeroRows_MPIAIJ, 2716 MatGetColumnNorms_MPIAIJ, 2717 MatInvertBlockDiagonal_MPIAIJ, 2718 MatInvertVariableBlockDiagonal_MPIAIJ, 2719 MatCreateSubMatricesMPI_MPIAIJ, 2720 /*129*/0, 2721 0, 2722 0, 2723 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2724 0, 2725 /*134*/0, 2726 0, 2727 0, 2728 0, 2729 0, 2730 /*139*/MatSetBlockSizes_MPIAIJ, 2731 0, 2732 0, 2733 MatFDColoringSetUp_MPIXAIJ, 2734 MatFindOffBlockDiagonalEntries_MPIAIJ, 2735 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2736 /*145*/0, 2737 0, 2738 0 2739 }; 2740 2741 /* ----------------------------------------------------------------------------------------*/ 2742 2743 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2744 { 2745 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2746 PetscErrorCode ierr; 2747 2748 PetscFunctionBegin; 2749 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2750 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2751 PetscFunctionReturn(0); 2752 } 2753 2754 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2755 { 2756 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2757 PetscErrorCode ierr; 2758 2759 PetscFunctionBegin; 2760 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2761 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2762 PetscFunctionReturn(0); 2763 } 2764 2765 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2766 { 2767 Mat_MPIAIJ *b; 2768 PetscErrorCode ierr; 2769 PetscMPIInt size; 2770 2771 PetscFunctionBegin; 2772 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2773 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2774 b = (Mat_MPIAIJ*)B->data; 2775 2776 #if defined(PETSC_USE_CTABLE) 2777 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2778 #else 2779 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2780 #endif 2781 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2782 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2783 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2784 2785 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2786 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2787 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2788 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2789 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2790 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2791 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2793 2794 if (!B->preallocated) { 2795 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2796 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2797 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2798 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2799 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2800 } 2801 2802 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2803 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2804 B->preallocated = PETSC_TRUE; 2805 B->was_assembled = PETSC_FALSE; 2806 B->assembled = PETSC_FALSE; 2807 PetscFunctionReturn(0); 2808 } 2809 2810 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2811 { 2812 Mat_MPIAIJ *b; 2813 PetscErrorCode ierr; 2814 2815 PetscFunctionBegin; 2816 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2817 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2818 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2819 b = (Mat_MPIAIJ*)B->data; 2820 2821 #if defined(PETSC_USE_CTABLE) 2822 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2823 #else 2824 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2825 #endif 2826 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2827 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2828 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2829 2830 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2831 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2832 B->preallocated = PETSC_TRUE; 2833 B->was_assembled = PETSC_FALSE; 2834 B->assembled = PETSC_FALSE; 2835 PetscFunctionReturn(0); 2836 } 2837 2838 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2839 { 2840 Mat mat; 2841 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2842 PetscErrorCode ierr; 2843 2844 PetscFunctionBegin; 2845 *newmat = 0; 2846 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2847 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2848 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2849 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2850 a = (Mat_MPIAIJ*)mat->data; 2851 2852 mat->factortype = matin->factortype; 2853 mat->assembled = matin->assembled; 2854 mat->insertmode = NOT_SET_VALUES; 2855 mat->preallocated = matin->preallocated; 2856 2857 a->size = oldmat->size; 2858 a->rank = oldmat->rank; 2859 a->donotstash = oldmat->donotstash; 2860 a->roworiented = oldmat->roworiented; 2861 a->rowindices = NULL; 2862 a->rowvalues = NULL; 2863 a->getrowactive = PETSC_FALSE; 2864 2865 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2866 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2867 2868 if (oldmat->colmap) { 2869 #if defined(PETSC_USE_CTABLE) 2870 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2871 #else 2872 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2873 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2874 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2875 #endif 2876 } else a->colmap = NULL; 2877 if (oldmat->garray) { 2878 PetscInt len; 2879 len = oldmat->B->cmap->n; 2880 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2881 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2882 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2883 } else a->garray = NULL; 2884 2885 /* It may happen MatDuplicate is called with a non-assembled matrix 2886 In fact, MatDuplicate only requires the matrix to be preallocated 2887 This may happen inside a DMCreateMatrix_Shell */ 2888 if (oldmat->lvec) { 2889 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2890 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2891 } 2892 if (oldmat->Mvctx) { 2893 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2894 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2895 } 2896 if (oldmat->Mvctx_mpi1) { 2897 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2898 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2899 } 2900 2901 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2902 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2903 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2904 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2905 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2906 *newmat = mat; 2907 PetscFunctionReturn(0); 2908 } 2909 2910 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2911 { 2912 PetscBool isbinary, ishdf5; 2913 PetscErrorCode ierr; 2914 2915 PetscFunctionBegin; 2916 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2917 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2918 /* force binary viewer to load .info file if it has not yet done so */ 2919 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2920 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2921 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2922 if (isbinary) { 2923 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2924 } else if (ishdf5) { 2925 #if defined(PETSC_HAVE_HDF5) 2926 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2927 #else 2928 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2929 #endif 2930 } else { 2931 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2932 } 2933 PetscFunctionReturn(0); 2934 } 2935 2936 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2937 { 2938 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2939 PetscInt *rowidxs,*colidxs; 2940 PetscScalar *matvals; 2941 PetscErrorCode ierr; 2942 2943 PetscFunctionBegin; 2944 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2945 2946 /* read in matrix header */ 2947 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2948 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2949 M = header[1]; N = header[2]; nz = header[3]; 2950 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 2951 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 2952 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2953 2954 /* set block sizes from the viewer's .info file */ 2955 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 2956 /* set global sizes if not set already */ 2957 if (mat->rmap->N < 0) mat->rmap->N = M; 2958 if (mat->cmap->N < 0) mat->cmap->N = N; 2959 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 2960 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 2961 2962 /* check if the matrix sizes are correct */ 2963 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2964 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 2965 2966 /* read in row lengths and build row indices */ 2967 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 2968 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 2969 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 2970 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 2971 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 2972 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 2973 /* read in column indices and matrix values */ 2974 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 2975 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 2976 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 2977 /* store matrix indices and values */ 2978 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 2979 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 2980 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 2981 PetscFunctionReturn(0); 2982 } 2983 2984 /* Not scalable because of ISAllGather() unless getting all columns. */ 2985 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2986 { 2987 PetscErrorCode ierr; 2988 IS iscol_local; 2989 PetscBool isstride; 2990 PetscMPIInt lisstride=0,gisstride; 2991 2992 PetscFunctionBegin; 2993 /* check if we are grabbing all columns*/ 2994 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2995 2996 if (isstride) { 2997 PetscInt start,len,mstart,mlen; 2998 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2999 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3000 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3001 if (mstart == start && mlen-mstart == len) lisstride = 1; 3002 } 3003 3004 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3005 if (gisstride) { 3006 PetscInt N; 3007 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3008 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3009 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3010 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3011 } else { 3012 PetscInt cbs; 3013 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3014 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3015 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3016 } 3017 3018 *isseq = iscol_local; 3019 PetscFunctionReturn(0); 3020 } 3021 3022 /* 3023 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3024 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3025 3026 Input Parameters: 3027 mat - matrix 3028 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3029 i.e., mat->rstart <= isrow[i] < mat->rend 3030 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3031 i.e., mat->cstart <= iscol[i] < mat->cend 3032 Output Parameter: 3033 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3034 iscol_o - sequential column index set for retrieving mat->B 3035 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3036 */ 3037 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3038 { 3039 PetscErrorCode ierr; 3040 Vec x,cmap; 3041 const PetscInt *is_idx; 3042 PetscScalar *xarray,*cmaparray; 3043 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3044 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3045 Mat B=a->B; 3046 Vec lvec=a->lvec,lcmap; 3047 PetscInt i,cstart,cend,Bn=B->cmap->N; 3048 MPI_Comm comm; 3049 VecScatter Mvctx=a->Mvctx; 3050 3051 PetscFunctionBegin; 3052 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3053 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3054 3055 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3056 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3057 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3058 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3059 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3060 3061 /* Get start indices */ 3062 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3063 isstart -= ncols; 3064 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3065 3066 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3067 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3068 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3069 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3070 for (i=0; i<ncols; i++) { 3071 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3072 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3073 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3074 } 3075 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3076 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3077 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3078 3079 /* Get iscol_d */ 3080 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3081 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3082 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3083 3084 /* Get isrow_d */ 3085 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3086 rstart = mat->rmap->rstart; 3087 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3088 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3089 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3090 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3091 3092 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3093 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3094 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3095 3096 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3097 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3098 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3099 3100 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3101 3102 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3103 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3104 3105 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3106 /* off-process column indices */ 3107 count = 0; 3108 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3109 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3110 3111 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3112 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3113 for (i=0; i<Bn; i++) { 3114 if (PetscRealPart(xarray[i]) > -1.0) { 3115 idx[count] = i; /* local column index in off-diagonal part B */ 3116 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3117 count++; 3118 } 3119 } 3120 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3121 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3122 3123 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3124 /* cannot ensure iscol_o has same blocksize as iscol! */ 3125 3126 ierr = PetscFree(idx);CHKERRQ(ierr); 3127 *garray = cmap1; 3128 3129 ierr = VecDestroy(&x);CHKERRQ(ierr); 3130 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3131 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3132 PetscFunctionReturn(0); 3133 } 3134 3135 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3136 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3137 { 3138 PetscErrorCode ierr; 3139 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3140 Mat M = NULL; 3141 MPI_Comm comm; 3142 IS iscol_d,isrow_d,iscol_o; 3143 Mat Asub = NULL,Bsub = NULL; 3144 PetscInt n; 3145 3146 PetscFunctionBegin; 3147 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3148 3149 if (call == MAT_REUSE_MATRIX) { 3150 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3151 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3152 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3153 3154 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3155 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3156 3157 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3158 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3159 3160 /* Update diagonal and off-diagonal portions of submat */ 3161 asub = (Mat_MPIAIJ*)(*submat)->data; 3162 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3163 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3164 if (n) { 3165 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3166 } 3167 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3168 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3169 3170 } else { /* call == MAT_INITIAL_MATRIX) */ 3171 const PetscInt *garray; 3172 PetscInt BsubN; 3173 3174 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3175 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3176 3177 /* Create local submatrices Asub and Bsub */ 3178 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3179 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3180 3181 /* Create submatrix M */ 3182 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3183 3184 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3185 asub = (Mat_MPIAIJ*)M->data; 3186 3187 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3188 n = asub->B->cmap->N; 3189 if (BsubN > n) { 3190 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3191 const PetscInt *idx; 3192 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3193 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3194 3195 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3196 j = 0; 3197 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3198 for (i=0; i<n; i++) { 3199 if (j >= BsubN) break; 3200 while (subgarray[i] > garray[j]) j++; 3201 3202 if (subgarray[i] == garray[j]) { 3203 idx_new[i] = idx[j++]; 3204 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3205 } 3206 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3207 3208 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3209 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3210 3211 } else if (BsubN < n) { 3212 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3213 } 3214 3215 ierr = PetscFree(garray);CHKERRQ(ierr); 3216 *submat = M; 3217 3218 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3219 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3220 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3221 3222 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3223 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3224 3225 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3226 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3227 } 3228 PetscFunctionReturn(0); 3229 } 3230 3231 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3232 { 3233 PetscErrorCode ierr; 3234 IS iscol_local=NULL,isrow_d; 3235 PetscInt csize; 3236 PetscInt n,i,j,start,end; 3237 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3238 MPI_Comm comm; 3239 3240 PetscFunctionBegin; 3241 /* If isrow has same processor distribution as mat, 3242 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3243 if (call == MAT_REUSE_MATRIX) { 3244 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3245 if (isrow_d) { 3246 sameRowDist = PETSC_TRUE; 3247 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3248 } else { 3249 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3250 if (iscol_local) { 3251 sameRowDist = PETSC_TRUE; 3252 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3253 } 3254 } 3255 } else { 3256 /* Check if isrow has same processor distribution as mat */ 3257 sameDist[0] = PETSC_FALSE; 3258 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3259 if (!n) { 3260 sameDist[0] = PETSC_TRUE; 3261 } else { 3262 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3263 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3264 if (i >= start && j < end) { 3265 sameDist[0] = PETSC_TRUE; 3266 } 3267 } 3268 3269 /* Check if iscol has same processor distribution as mat */ 3270 sameDist[1] = PETSC_FALSE; 3271 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3272 if (!n) { 3273 sameDist[1] = PETSC_TRUE; 3274 } else { 3275 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3276 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3277 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3278 } 3279 3280 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3281 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3282 sameRowDist = tsameDist[0]; 3283 } 3284 3285 if (sameRowDist) { 3286 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3287 /* isrow and iscol have same processor distribution as mat */ 3288 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3289 PetscFunctionReturn(0); 3290 } else { /* sameRowDist */ 3291 /* isrow has same processor distribution as mat */ 3292 if (call == MAT_INITIAL_MATRIX) { 3293 PetscBool sorted; 3294 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3295 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3296 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3297 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3298 3299 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3300 if (sorted) { 3301 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3302 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3303 PetscFunctionReturn(0); 3304 } 3305 } else { /* call == MAT_REUSE_MATRIX */ 3306 IS iscol_sub; 3307 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3308 if (iscol_sub) { 3309 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3310 PetscFunctionReturn(0); 3311 } 3312 } 3313 } 3314 } 3315 3316 /* General case: iscol -> iscol_local which has global size of iscol */ 3317 if (call == MAT_REUSE_MATRIX) { 3318 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3319 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3320 } else { 3321 if (!iscol_local) { 3322 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3323 } 3324 } 3325 3326 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3327 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3328 3329 if (call == MAT_INITIAL_MATRIX) { 3330 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3331 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3332 } 3333 PetscFunctionReturn(0); 3334 } 3335 3336 /*@C 3337 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3338 and "off-diagonal" part of the matrix in CSR format. 3339 3340 Collective 3341 3342 Input Parameters: 3343 + comm - MPI communicator 3344 . A - "diagonal" portion of matrix 3345 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3346 - garray - global index of B columns 3347 3348 Output Parameter: 3349 . mat - the matrix, with input A as its local diagonal matrix 3350 Level: advanced 3351 3352 Notes: 3353 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3354 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3355 3356 .seealso: MatCreateMPIAIJWithSplitArrays() 3357 @*/ 3358 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3359 { 3360 PetscErrorCode ierr; 3361 Mat_MPIAIJ *maij; 3362 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3363 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3364 PetscScalar *oa=b->a; 3365 Mat Bnew; 3366 PetscInt m,n,N; 3367 3368 PetscFunctionBegin; 3369 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3370 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3371 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3372 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3373 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3374 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3375 3376 /* Get global columns of mat */ 3377 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3378 3379 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3380 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3381 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3382 maij = (Mat_MPIAIJ*)(*mat)->data; 3383 3384 (*mat)->preallocated = PETSC_TRUE; 3385 3386 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3387 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3388 3389 /* Set A as diagonal portion of *mat */ 3390 maij->A = A; 3391 3392 nz = oi[m]; 3393 for (i=0; i<nz; i++) { 3394 col = oj[i]; 3395 oj[i] = garray[col]; 3396 } 3397 3398 /* Set Bnew as off-diagonal portion of *mat */ 3399 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3400 bnew = (Mat_SeqAIJ*)Bnew->data; 3401 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3402 maij->B = Bnew; 3403 3404 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3405 3406 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3407 b->free_a = PETSC_FALSE; 3408 b->free_ij = PETSC_FALSE; 3409 ierr = MatDestroy(&B);CHKERRQ(ierr); 3410 3411 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3412 bnew->free_a = PETSC_TRUE; 3413 bnew->free_ij = PETSC_TRUE; 3414 3415 /* condense columns of maij->B */ 3416 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3417 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3418 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3419 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3420 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3421 PetscFunctionReturn(0); 3422 } 3423 3424 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3425 3426 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3427 { 3428 PetscErrorCode ierr; 3429 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3430 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3431 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3432 Mat M,Msub,B=a->B; 3433 MatScalar *aa; 3434 Mat_SeqAIJ *aij; 3435 PetscInt *garray = a->garray,*colsub,Ncols; 3436 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3437 IS iscol_sub,iscmap; 3438 const PetscInt *is_idx,*cmap; 3439 PetscBool allcolumns=PETSC_FALSE; 3440 MPI_Comm comm; 3441 3442 PetscFunctionBegin; 3443 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3444 3445 if (call == MAT_REUSE_MATRIX) { 3446 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3447 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3448 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3449 3450 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3451 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3452 3453 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3454 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3455 3456 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3457 3458 } else { /* call == MAT_INITIAL_MATRIX) */ 3459 PetscBool flg; 3460 3461 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3462 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3463 3464 /* (1) iscol -> nonscalable iscol_local */ 3465 /* Check for special case: each processor gets entire matrix columns */ 3466 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3467 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3468 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3469 if (allcolumns) { 3470 iscol_sub = iscol_local; 3471 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3472 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3473 3474 } else { 3475 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3476 PetscInt *idx,*cmap1,k; 3477 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3478 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3479 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3480 count = 0; 3481 k = 0; 3482 for (i=0; i<Ncols; i++) { 3483 j = is_idx[i]; 3484 if (j >= cstart && j < cend) { 3485 /* diagonal part of mat */ 3486 idx[count] = j; 3487 cmap1[count++] = i; /* column index in submat */ 3488 } else if (Bn) { 3489 /* off-diagonal part of mat */ 3490 if (j == garray[k]) { 3491 idx[count] = j; 3492 cmap1[count++] = i; /* column index in submat */ 3493 } else if (j > garray[k]) { 3494 while (j > garray[k] && k < Bn-1) k++; 3495 if (j == garray[k]) { 3496 idx[count] = j; 3497 cmap1[count++] = i; /* column index in submat */ 3498 } 3499 } 3500 } 3501 } 3502 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3503 3504 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3505 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3506 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3507 3508 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3509 } 3510 3511 /* (3) Create sequential Msub */ 3512 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3513 } 3514 3515 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3516 aij = (Mat_SeqAIJ*)(Msub)->data; 3517 ii = aij->i; 3518 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3519 3520 /* 3521 m - number of local rows 3522 Ncols - number of columns (same on all processors) 3523 rstart - first row in new global matrix generated 3524 */ 3525 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3526 3527 if (call == MAT_INITIAL_MATRIX) { 3528 /* (4) Create parallel newmat */ 3529 PetscMPIInt rank,size; 3530 PetscInt csize; 3531 3532 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3533 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3534 3535 /* 3536 Determine the number of non-zeros in the diagonal and off-diagonal 3537 portions of the matrix in order to do correct preallocation 3538 */ 3539 3540 /* first get start and end of "diagonal" columns */ 3541 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3542 if (csize == PETSC_DECIDE) { 3543 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3544 if (mglobal == Ncols) { /* square matrix */ 3545 nlocal = m; 3546 } else { 3547 nlocal = Ncols/size + ((Ncols % size) > rank); 3548 } 3549 } else { 3550 nlocal = csize; 3551 } 3552 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3553 rstart = rend - nlocal; 3554 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3555 3556 /* next, compute all the lengths */ 3557 jj = aij->j; 3558 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3559 olens = dlens + m; 3560 for (i=0; i<m; i++) { 3561 jend = ii[i+1] - ii[i]; 3562 olen = 0; 3563 dlen = 0; 3564 for (j=0; j<jend; j++) { 3565 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3566 else dlen++; 3567 jj++; 3568 } 3569 olens[i] = olen; 3570 dlens[i] = dlen; 3571 } 3572 3573 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3574 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3575 3576 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3577 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3578 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3579 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3580 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3581 ierr = PetscFree(dlens);CHKERRQ(ierr); 3582 3583 } else { /* call == MAT_REUSE_MATRIX */ 3584 M = *newmat; 3585 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3586 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3587 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3588 /* 3589 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3590 rather than the slower MatSetValues(). 3591 */ 3592 M->was_assembled = PETSC_TRUE; 3593 M->assembled = PETSC_FALSE; 3594 } 3595 3596 /* (5) Set values of Msub to *newmat */ 3597 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3598 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3599 3600 jj = aij->j; 3601 aa = aij->a; 3602 for (i=0; i<m; i++) { 3603 row = rstart + i; 3604 nz = ii[i+1] - ii[i]; 3605 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3606 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3607 jj += nz; aa += nz; 3608 } 3609 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3610 3611 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3612 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3613 3614 ierr = PetscFree(colsub);CHKERRQ(ierr); 3615 3616 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3617 if (call == MAT_INITIAL_MATRIX) { 3618 *newmat = M; 3619 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3620 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3621 3622 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3623 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3624 3625 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3626 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3627 3628 if (iscol_local) { 3629 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3630 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3631 } 3632 } 3633 PetscFunctionReturn(0); 3634 } 3635 3636 /* 3637 Not great since it makes two copies of the submatrix, first an SeqAIJ 3638 in local and then by concatenating the local matrices the end result. 3639 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3640 3641 Note: This requires a sequential iscol with all indices. 3642 */ 3643 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3644 { 3645 PetscErrorCode ierr; 3646 PetscMPIInt rank,size; 3647 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3648 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3649 Mat M,Mreuse; 3650 MatScalar *aa,*vwork; 3651 MPI_Comm comm; 3652 Mat_SeqAIJ *aij; 3653 PetscBool colflag,allcolumns=PETSC_FALSE; 3654 3655 PetscFunctionBegin; 3656 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3657 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3658 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3659 3660 /* Check for special case: each processor gets entire matrix columns */ 3661 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3662 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3663 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3664 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3665 3666 if (call == MAT_REUSE_MATRIX) { 3667 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3668 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3669 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3670 } else { 3671 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3672 } 3673 3674 /* 3675 m - number of local rows 3676 n - number of columns (same on all processors) 3677 rstart - first row in new global matrix generated 3678 */ 3679 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3680 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3681 if (call == MAT_INITIAL_MATRIX) { 3682 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3683 ii = aij->i; 3684 jj = aij->j; 3685 3686 /* 3687 Determine the number of non-zeros in the diagonal and off-diagonal 3688 portions of the matrix in order to do correct preallocation 3689 */ 3690 3691 /* first get start and end of "diagonal" columns */ 3692 if (csize == PETSC_DECIDE) { 3693 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3694 if (mglobal == n) { /* square matrix */ 3695 nlocal = m; 3696 } else { 3697 nlocal = n/size + ((n % size) > rank); 3698 } 3699 } else { 3700 nlocal = csize; 3701 } 3702 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3703 rstart = rend - nlocal; 3704 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3705 3706 /* next, compute all the lengths */ 3707 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3708 olens = dlens + m; 3709 for (i=0; i<m; i++) { 3710 jend = ii[i+1] - ii[i]; 3711 olen = 0; 3712 dlen = 0; 3713 for (j=0; j<jend; j++) { 3714 if (*jj < rstart || *jj >= rend) olen++; 3715 else dlen++; 3716 jj++; 3717 } 3718 olens[i] = olen; 3719 dlens[i] = dlen; 3720 } 3721 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3722 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3723 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3724 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3725 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3726 ierr = PetscFree(dlens);CHKERRQ(ierr); 3727 } else { 3728 PetscInt ml,nl; 3729 3730 M = *newmat; 3731 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3732 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3733 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3734 /* 3735 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3736 rather than the slower MatSetValues(). 3737 */ 3738 M->was_assembled = PETSC_TRUE; 3739 M->assembled = PETSC_FALSE; 3740 } 3741 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3742 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3743 ii = aij->i; 3744 jj = aij->j; 3745 aa = aij->a; 3746 for (i=0; i<m; i++) { 3747 row = rstart + i; 3748 nz = ii[i+1] - ii[i]; 3749 cwork = jj; jj += nz; 3750 vwork = aa; aa += nz; 3751 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3752 } 3753 3754 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3755 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3756 *newmat = M; 3757 3758 /* save submatrix used in processor for next request */ 3759 if (call == MAT_INITIAL_MATRIX) { 3760 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3761 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3762 } 3763 PetscFunctionReturn(0); 3764 } 3765 3766 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3767 { 3768 PetscInt m,cstart, cend,j,nnz,i,d; 3769 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3770 const PetscInt *JJ; 3771 PetscErrorCode ierr; 3772 PetscBool nooffprocentries; 3773 3774 PetscFunctionBegin; 3775 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3776 3777 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3778 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3779 m = B->rmap->n; 3780 cstart = B->cmap->rstart; 3781 cend = B->cmap->rend; 3782 rstart = B->rmap->rstart; 3783 3784 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3785 3786 if (PetscDefined(USE_DEBUG)) { 3787 for (i=0; i<m; i++) { 3788 nnz = Ii[i+1]- Ii[i]; 3789 JJ = J + Ii[i]; 3790 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3791 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3792 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3793 } 3794 } 3795 3796 for (i=0; i<m; i++) { 3797 nnz = Ii[i+1]- Ii[i]; 3798 JJ = J + Ii[i]; 3799 nnz_max = PetscMax(nnz_max,nnz); 3800 d = 0; 3801 for (j=0; j<nnz; j++) { 3802 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3803 } 3804 d_nnz[i] = d; 3805 o_nnz[i] = nnz - d; 3806 } 3807 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3808 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3809 3810 for (i=0; i<m; i++) { 3811 ii = i + rstart; 3812 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3813 } 3814 nooffprocentries = B->nooffprocentries; 3815 B->nooffprocentries = PETSC_TRUE; 3816 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3817 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3818 B->nooffprocentries = nooffprocentries; 3819 3820 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3821 PetscFunctionReturn(0); 3822 } 3823 3824 /*@ 3825 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3826 (the default parallel PETSc format). 3827 3828 Collective 3829 3830 Input Parameters: 3831 + B - the matrix 3832 . i - the indices into j for the start of each local row (starts with zero) 3833 . j - the column indices for each local row (starts with zero) 3834 - v - optional values in the matrix 3835 3836 Level: developer 3837 3838 Notes: 3839 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3840 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3841 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3842 3843 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3844 3845 The format which is used for the sparse matrix input, is equivalent to a 3846 row-major ordering.. i.e for the following matrix, the input data expected is 3847 as shown 3848 3849 $ 1 0 0 3850 $ 2 0 3 P0 3851 $ ------- 3852 $ 4 5 6 P1 3853 $ 3854 $ Process0 [P0]: rows_owned=[0,1] 3855 $ i = {0,1,3} [size = nrow+1 = 2+1] 3856 $ j = {0,0,2} [size = 3] 3857 $ v = {1,2,3} [size = 3] 3858 $ 3859 $ Process1 [P1]: rows_owned=[2] 3860 $ i = {0,3} [size = nrow+1 = 1+1] 3861 $ j = {0,1,2} [size = 3] 3862 $ v = {4,5,6} [size = 3] 3863 3864 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3865 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3866 @*/ 3867 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3868 { 3869 PetscErrorCode ierr; 3870 3871 PetscFunctionBegin; 3872 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3873 PetscFunctionReturn(0); 3874 } 3875 3876 /*@C 3877 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3878 (the default parallel PETSc format). For good matrix assembly performance 3879 the user should preallocate the matrix storage by setting the parameters 3880 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3881 performance can be increased by more than a factor of 50. 3882 3883 Collective 3884 3885 Input Parameters: 3886 + B - the matrix 3887 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3888 (same value is used for all local rows) 3889 . d_nnz - array containing the number of nonzeros in the various rows of the 3890 DIAGONAL portion of the local submatrix (possibly different for each row) 3891 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3892 The size of this array is equal to the number of local rows, i.e 'm'. 3893 For matrices that will be factored, you must leave room for (and set) 3894 the diagonal entry even if it is zero. 3895 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3896 submatrix (same value is used for all local rows). 3897 - o_nnz - array containing the number of nonzeros in the various rows of the 3898 OFF-DIAGONAL portion of the local submatrix (possibly different for 3899 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3900 structure. The size of this array is equal to the number 3901 of local rows, i.e 'm'. 3902 3903 If the *_nnz parameter is given then the *_nz parameter is ignored 3904 3905 The AIJ format (also called the Yale sparse matrix format or 3906 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3907 storage. The stored row and column indices begin with zero. 3908 See Users-Manual: ch_mat for details. 3909 3910 The parallel matrix is partitioned such that the first m0 rows belong to 3911 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3912 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3913 3914 The DIAGONAL portion of the local submatrix of a processor can be defined 3915 as the submatrix which is obtained by extraction the part corresponding to 3916 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3917 first row that belongs to the processor, r2 is the last row belonging to 3918 the this processor, and c1-c2 is range of indices of the local part of a 3919 vector suitable for applying the matrix to. This is an mxn matrix. In the 3920 common case of a square matrix, the row and column ranges are the same and 3921 the DIAGONAL part is also square. The remaining portion of the local 3922 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3923 3924 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3925 3926 You can call MatGetInfo() to get information on how effective the preallocation was; 3927 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3928 You can also run with the option -info and look for messages with the string 3929 malloc in them to see if additional memory allocation was needed. 3930 3931 Example usage: 3932 3933 Consider the following 8x8 matrix with 34 non-zero values, that is 3934 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3935 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3936 as follows: 3937 3938 .vb 3939 1 2 0 | 0 3 0 | 0 4 3940 Proc0 0 5 6 | 7 0 0 | 8 0 3941 9 0 10 | 11 0 0 | 12 0 3942 ------------------------------------- 3943 13 0 14 | 15 16 17 | 0 0 3944 Proc1 0 18 0 | 19 20 21 | 0 0 3945 0 0 0 | 22 23 0 | 24 0 3946 ------------------------------------- 3947 Proc2 25 26 27 | 0 0 28 | 29 0 3948 30 0 0 | 31 32 33 | 0 34 3949 .ve 3950 3951 This can be represented as a collection of submatrices as: 3952 3953 .vb 3954 A B C 3955 D E F 3956 G H I 3957 .ve 3958 3959 Where the submatrices A,B,C are owned by proc0, D,E,F are 3960 owned by proc1, G,H,I are owned by proc2. 3961 3962 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3963 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3964 The 'M','N' parameters are 8,8, and have the same values on all procs. 3965 3966 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3967 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3968 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3969 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3970 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3971 matrix, ans [DF] as another SeqAIJ matrix. 3972 3973 When d_nz, o_nz parameters are specified, d_nz storage elements are 3974 allocated for every row of the local diagonal submatrix, and o_nz 3975 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3976 One way to choose d_nz and o_nz is to use the max nonzerors per local 3977 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3978 In this case, the values of d_nz,o_nz are: 3979 .vb 3980 proc0 : dnz = 2, o_nz = 2 3981 proc1 : dnz = 3, o_nz = 2 3982 proc2 : dnz = 1, o_nz = 4 3983 .ve 3984 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3985 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3986 for proc3. i.e we are using 12+15+10=37 storage locations to store 3987 34 values. 3988 3989 When d_nnz, o_nnz parameters are specified, the storage is specified 3990 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3991 In the above case the values for d_nnz,o_nnz are: 3992 .vb 3993 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3994 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3995 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3996 .ve 3997 Here the space allocated is sum of all the above values i.e 34, and 3998 hence pre-allocation is perfect. 3999 4000 Level: intermediate 4001 4002 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4003 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4004 @*/ 4005 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4006 { 4007 PetscErrorCode ierr; 4008 4009 PetscFunctionBegin; 4010 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4011 PetscValidType(B,1); 4012 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4013 PetscFunctionReturn(0); 4014 } 4015 4016 /*@ 4017 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4018 CSR format for the local rows. 4019 4020 Collective 4021 4022 Input Parameters: 4023 + comm - MPI communicator 4024 . m - number of local rows (Cannot be PETSC_DECIDE) 4025 . n - This value should be the same as the local size used in creating the 4026 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4027 calculated if N is given) For square matrices n is almost always m. 4028 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4029 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4030 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4031 . j - column indices 4032 - a - matrix values 4033 4034 Output Parameter: 4035 . mat - the matrix 4036 4037 Level: intermediate 4038 4039 Notes: 4040 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4041 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4042 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4043 4044 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4045 4046 The format which is used for the sparse matrix input, is equivalent to a 4047 row-major ordering.. i.e for the following matrix, the input data expected is 4048 as shown 4049 4050 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4051 4052 $ 1 0 0 4053 $ 2 0 3 P0 4054 $ ------- 4055 $ 4 5 6 P1 4056 $ 4057 $ Process0 [P0]: rows_owned=[0,1] 4058 $ i = {0,1,3} [size = nrow+1 = 2+1] 4059 $ j = {0,0,2} [size = 3] 4060 $ v = {1,2,3} [size = 3] 4061 $ 4062 $ Process1 [P1]: rows_owned=[2] 4063 $ i = {0,3} [size = nrow+1 = 1+1] 4064 $ j = {0,1,2} [size = 3] 4065 $ v = {4,5,6} [size = 3] 4066 4067 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4068 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4069 @*/ 4070 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4071 { 4072 PetscErrorCode ierr; 4073 4074 PetscFunctionBegin; 4075 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4076 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4077 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4078 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4079 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4080 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4081 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4082 PetscFunctionReturn(0); 4083 } 4084 4085 /*@ 4086 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4087 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4088 4089 Collective 4090 4091 Input Parameters: 4092 + mat - the matrix 4093 . m - number of local rows (Cannot be PETSC_DECIDE) 4094 . n - This value should be the same as the local size used in creating the 4095 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4096 calculated if N is given) For square matrices n is almost always m. 4097 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4098 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4099 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4100 . J - column indices 4101 - v - matrix values 4102 4103 Level: intermediate 4104 4105 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4106 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4107 @*/ 4108 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4109 { 4110 PetscErrorCode ierr; 4111 PetscInt cstart,nnz,i,j; 4112 PetscInt *ld; 4113 PetscBool nooffprocentries; 4114 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4115 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4116 PetscScalar *ad = Ad->a, *ao = Ao->a; 4117 const PetscInt *Adi = Ad->i; 4118 PetscInt ldi,Iii,md; 4119 4120 PetscFunctionBegin; 4121 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4122 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4123 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4124 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4125 4126 cstart = mat->cmap->rstart; 4127 if (!Aij->ld) { 4128 /* count number of entries below block diagonal */ 4129 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4130 Aij->ld = ld; 4131 for (i=0; i<m; i++) { 4132 nnz = Ii[i+1]- Ii[i]; 4133 j = 0; 4134 while (J[j] < cstart && j < nnz) {j++;} 4135 J += nnz; 4136 ld[i] = j; 4137 } 4138 } else { 4139 ld = Aij->ld; 4140 } 4141 4142 for (i=0; i<m; i++) { 4143 nnz = Ii[i+1]- Ii[i]; 4144 Iii = Ii[i]; 4145 ldi = ld[i]; 4146 md = Adi[i+1]-Adi[i]; 4147 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4148 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4149 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4150 ad += md; 4151 ao += nnz - md; 4152 } 4153 nooffprocentries = mat->nooffprocentries; 4154 mat->nooffprocentries = PETSC_TRUE; 4155 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4156 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4157 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4158 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4159 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4160 mat->nooffprocentries = nooffprocentries; 4161 PetscFunctionReturn(0); 4162 } 4163 4164 /*@C 4165 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4166 (the default parallel PETSc format). For good matrix assembly performance 4167 the user should preallocate the matrix storage by setting the parameters 4168 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4169 performance can be increased by more than a factor of 50. 4170 4171 Collective 4172 4173 Input Parameters: 4174 + comm - MPI communicator 4175 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4176 This value should be the same as the local size used in creating the 4177 y vector for the matrix-vector product y = Ax. 4178 . n - This value should be the same as the local size used in creating the 4179 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4180 calculated if N is given) For square matrices n is almost always m. 4181 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4182 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4183 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4184 (same value is used for all local rows) 4185 . d_nnz - array containing the number of nonzeros in the various rows of the 4186 DIAGONAL portion of the local submatrix (possibly different for each row) 4187 or NULL, if d_nz is used to specify the nonzero structure. 4188 The size of this array is equal to the number of local rows, i.e 'm'. 4189 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4190 submatrix (same value is used for all local rows). 4191 - o_nnz - array containing the number of nonzeros in the various rows of the 4192 OFF-DIAGONAL portion of the local submatrix (possibly different for 4193 each row) or NULL, if o_nz is used to specify the nonzero 4194 structure. The size of this array is equal to the number 4195 of local rows, i.e 'm'. 4196 4197 Output Parameter: 4198 . A - the matrix 4199 4200 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4201 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4202 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4203 4204 Notes: 4205 If the *_nnz parameter is given then the *_nz parameter is ignored 4206 4207 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4208 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4209 storage requirements for this matrix. 4210 4211 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4212 processor than it must be used on all processors that share the object for 4213 that argument. 4214 4215 The user MUST specify either the local or global matrix dimensions 4216 (possibly both). 4217 4218 The parallel matrix is partitioned across processors such that the 4219 first m0 rows belong to process 0, the next m1 rows belong to 4220 process 1, the next m2 rows belong to process 2 etc.. where 4221 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4222 values corresponding to [m x N] submatrix. 4223 4224 The columns are logically partitioned with the n0 columns belonging 4225 to 0th partition, the next n1 columns belonging to the next 4226 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4227 4228 The DIAGONAL portion of the local submatrix on any given processor 4229 is the submatrix corresponding to the rows and columns m,n 4230 corresponding to the given processor. i.e diagonal matrix on 4231 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4232 etc. The remaining portion of the local submatrix [m x (N-n)] 4233 constitute the OFF-DIAGONAL portion. The example below better 4234 illustrates this concept. 4235 4236 For a square global matrix we define each processor's diagonal portion 4237 to be its local rows and the corresponding columns (a square submatrix); 4238 each processor's off-diagonal portion encompasses the remainder of the 4239 local matrix (a rectangular submatrix). 4240 4241 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4242 4243 When calling this routine with a single process communicator, a matrix of 4244 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4245 type of communicator, use the construction mechanism 4246 .vb 4247 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4248 .ve 4249 4250 $ MatCreate(...,&A); 4251 $ MatSetType(A,MATMPIAIJ); 4252 $ MatSetSizes(A, m,n,M,N); 4253 $ MatMPIAIJSetPreallocation(A,...); 4254 4255 By default, this format uses inodes (identical nodes) when possible. 4256 We search for consecutive rows with the same nonzero structure, thereby 4257 reusing matrix information to achieve increased efficiency. 4258 4259 Options Database Keys: 4260 + -mat_no_inode - Do not use inodes 4261 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4262 4263 4264 4265 Example usage: 4266 4267 Consider the following 8x8 matrix with 34 non-zero values, that is 4268 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4269 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4270 as follows 4271 4272 .vb 4273 1 2 0 | 0 3 0 | 0 4 4274 Proc0 0 5 6 | 7 0 0 | 8 0 4275 9 0 10 | 11 0 0 | 12 0 4276 ------------------------------------- 4277 13 0 14 | 15 16 17 | 0 0 4278 Proc1 0 18 0 | 19 20 21 | 0 0 4279 0 0 0 | 22 23 0 | 24 0 4280 ------------------------------------- 4281 Proc2 25 26 27 | 0 0 28 | 29 0 4282 30 0 0 | 31 32 33 | 0 34 4283 .ve 4284 4285 This can be represented as a collection of submatrices as 4286 4287 .vb 4288 A B C 4289 D E F 4290 G H I 4291 .ve 4292 4293 Where the submatrices A,B,C are owned by proc0, D,E,F are 4294 owned by proc1, G,H,I are owned by proc2. 4295 4296 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4297 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4298 The 'M','N' parameters are 8,8, and have the same values on all procs. 4299 4300 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4301 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4302 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4303 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4304 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4305 matrix, ans [DF] as another SeqAIJ matrix. 4306 4307 When d_nz, o_nz parameters are specified, d_nz storage elements are 4308 allocated for every row of the local diagonal submatrix, and o_nz 4309 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4310 One way to choose d_nz and o_nz is to use the max nonzerors per local 4311 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4312 In this case, the values of d_nz,o_nz are 4313 .vb 4314 proc0 : dnz = 2, o_nz = 2 4315 proc1 : dnz = 3, o_nz = 2 4316 proc2 : dnz = 1, o_nz = 4 4317 .ve 4318 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4319 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4320 for proc3. i.e we are using 12+15+10=37 storage locations to store 4321 34 values. 4322 4323 When d_nnz, o_nnz parameters are specified, the storage is specified 4324 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4325 In the above case the values for d_nnz,o_nnz are 4326 .vb 4327 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4328 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4329 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4330 .ve 4331 Here the space allocated is sum of all the above values i.e 34, and 4332 hence pre-allocation is perfect. 4333 4334 Level: intermediate 4335 4336 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4337 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4338 @*/ 4339 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4340 { 4341 PetscErrorCode ierr; 4342 PetscMPIInt size; 4343 4344 PetscFunctionBegin; 4345 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4346 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4347 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4348 if (size > 1) { 4349 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4350 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4351 } else { 4352 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4353 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4354 } 4355 PetscFunctionReturn(0); 4356 } 4357 4358 /*@C 4359 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4360 4361 Not collective 4362 4363 Input Parameter: 4364 . A - The MPIAIJ matrix 4365 4366 Output Parameters: 4367 + Ad - The local diagonal block as a SeqAIJ matrix 4368 . Ao - The local off-diagonal block as a SeqAIJ matrix 4369 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4370 4371 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4372 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4373 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4374 local column numbers to global column numbers in the original matrix. 4375 4376 Level: intermediate 4377 4378 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4379 @*/ 4380 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4381 { 4382 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4383 PetscBool flg; 4384 PetscErrorCode ierr; 4385 4386 PetscFunctionBegin; 4387 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4388 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4389 if (Ad) *Ad = a->A; 4390 if (Ao) *Ao = a->B; 4391 if (colmap) *colmap = a->garray; 4392 PetscFunctionReturn(0); 4393 } 4394 4395 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4396 { 4397 PetscErrorCode ierr; 4398 PetscInt m,N,i,rstart,nnz,Ii; 4399 PetscInt *indx; 4400 PetscScalar *values; 4401 4402 PetscFunctionBegin; 4403 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4404 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4405 PetscInt *dnz,*onz,sum,bs,cbs; 4406 4407 if (n == PETSC_DECIDE) { 4408 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4409 } 4410 /* Check sum(n) = N */ 4411 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4412 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4413 4414 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4415 rstart -= m; 4416 4417 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4418 for (i=0; i<m; i++) { 4419 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4420 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4421 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4422 } 4423 4424 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4425 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4426 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4427 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4428 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4429 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4430 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4431 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4432 } 4433 4434 /* numeric phase */ 4435 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4436 for (i=0; i<m; i++) { 4437 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4438 Ii = i + rstart; 4439 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4440 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4441 } 4442 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4443 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4444 PetscFunctionReturn(0); 4445 } 4446 4447 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4448 { 4449 PetscErrorCode ierr; 4450 PetscMPIInt rank; 4451 PetscInt m,N,i,rstart,nnz; 4452 size_t len; 4453 const PetscInt *indx; 4454 PetscViewer out; 4455 char *name; 4456 Mat B; 4457 const PetscScalar *values; 4458 4459 PetscFunctionBegin; 4460 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4461 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4462 /* Should this be the type of the diagonal block of A? */ 4463 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4464 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4465 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4466 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4467 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4468 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4469 for (i=0; i<m; i++) { 4470 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4471 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4472 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4473 } 4474 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4475 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4476 4477 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4478 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4479 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4480 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4481 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4482 ierr = PetscFree(name);CHKERRQ(ierr); 4483 ierr = MatView(B,out);CHKERRQ(ierr); 4484 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4485 ierr = MatDestroy(&B);CHKERRQ(ierr); 4486 PetscFunctionReturn(0); 4487 } 4488 4489 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4490 { 4491 PetscErrorCode ierr; 4492 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4493 4494 PetscFunctionBegin; 4495 if (!merge) PetscFunctionReturn(0); 4496 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4505 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4506 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4507 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4508 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4509 ierr = PetscFree(merge);CHKERRQ(ierr); 4510 PetscFunctionReturn(0); 4511 } 4512 4513 #include <../src/mat/utils/freespace.h> 4514 #include <petscbt.h> 4515 4516 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4517 { 4518 PetscErrorCode ierr; 4519 MPI_Comm comm; 4520 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4521 PetscMPIInt size,rank,taga,*len_s; 4522 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4523 PetscInt proc,m; 4524 PetscInt **buf_ri,**buf_rj; 4525 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4526 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4527 MPI_Request *s_waits,*r_waits; 4528 MPI_Status *status; 4529 MatScalar *aa=a->a; 4530 MatScalar **abuf_r,*ba_i; 4531 Mat_Merge_SeqsToMPI *merge; 4532 PetscContainer container; 4533 4534 PetscFunctionBegin; 4535 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4536 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4537 4538 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4539 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4540 4541 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4542 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4543 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4544 4545 bi = merge->bi; 4546 bj = merge->bj; 4547 buf_ri = merge->buf_ri; 4548 buf_rj = merge->buf_rj; 4549 4550 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4551 owners = merge->rowmap->range; 4552 len_s = merge->len_s; 4553 4554 /* send and recv matrix values */ 4555 /*-----------------------------*/ 4556 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4557 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4558 4559 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4560 for (proc=0,k=0; proc<size; proc++) { 4561 if (!len_s[proc]) continue; 4562 i = owners[proc]; 4563 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4564 k++; 4565 } 4566 4567 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4568 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4569 ierr = PetscFree(status);CHKERRQ(ierr); 4570 4571 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4572 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4573 4574 /* insert mat values of mpimat */ 4575 /*----------------------------*/ 4576 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4577 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4578 4579 for (k=0; k<merge->nrecv; k++) { 4580 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4581 nrows = *(buf_ri_k[k]); 4582 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4583 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4584 } 4585 4586 /* set values of ba */ 4587 m = merge->rowmap->n; 4588 for (i=0; i<m; i++) { 4589 arow = owners[rank] + i; 4590 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4591 bnzi = bi[i+1] - bi[i]; 4592 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4593 4594 /* add local non-zero vals of this proc's seqmat into ba */ 4595 anzi = ai[arow+1] - ai[arow]; 4596 aj = a->j + ai[arow]; 4597 aa = a->a + ai[arow]; 4598 nextaj = 0; 4599 for (j=0; nextaj<anzi; j++) { 4600 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4601 ba_i[j] += aa[nextaj++]; 4602 } 4603 } 4604 4605 /* add received vals into ba */ 4606 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4607 /* i-th row */ 4608 if (i == *nextrow[k]) { 4609 anzi = *(nextai[k]+1) - *nextai[k]; 4610 aj = buf_rj[k] + *(nextai[k]); 4611 aa = abuf_r[k] + *(nextai[k]); 4612 nextaj = 0; 4613 for (j=0; nextaj<anzi; j++) { 4614 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4615 ba_i[j] += aa[nextaj++]; 4616 } 4617 } 4618 nextrow[k]++; nextai[k]++; 4619 } 4620 } 4621 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4622 } 4623 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4624 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4625 4626 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4627 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4628 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4629 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4630 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4631 PetscFunctionReturn(0); 4632 } 4633 4634 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4635 { 4636 PetscErrorCode ierr; 4637 Mat B_mpi; 4638 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4639 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4640 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4641 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4642 PetscInt len,proc,*dnz,*onz,bs,cbs; 4643 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4644 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4645 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4646 MPI_Status *status; 4647 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4648 PetscBT lnkbt; 4649 Mat_Merge_SeqsToMPI *merge; 4650 PetscContainer container; 4651 4652 PetscFunctionBegin; 4653 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4654 4655 /* make sure it is a PETSc comm */ 4656 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4657 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4658 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4659 4660 ierr = PetscNew(&merge);CHKERRQ(ierr); 4661 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4662 4663 /* determine row ownership */ 4664 /*---------------------------------------------------------*/ 4665 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4666 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4667 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4668 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4669 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4670 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4671 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4672 4673 m = merge->rowmap->n; 4674 owners = merge->rowmap->range; 4675 4676 /* determine the number of messages to send, their lengths */ 4677 /*---------------------------------------------------------*/ 4678 len_s = merge->len_s; 4679 4680 len = 0; /* length of buf_si[] */ 4681 merge->nsend = 0; 4682 for (proc=0; proc<size; proc++) { 4683 len_si[proc] = 0; 4684 if (proc == rank) { 4685 len_s[proc] = 0; 4686 } else { 4687 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4688 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4689 } 4690 if (len_s[proc]) { 4691 merge->nsend++; 4692 nrows = 0; 4693 for (i=owners[proc]; i<owners[proc+1]; i++) { 4694 if (ai[i+1] > ai[i]) nrows++; 4695 } 4696 len_si[proc] = 2*(nrows+1); 4697 len += len_si[proc]; 4698 } 4699 } 4700 4701 /* determine the number and length of messages to receive for ij-structure */ 4702 /*-------------------------------------------------------------------------*/ 4703 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4704 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4705 4706 /* post the Irecv of j-structure */ 4707 /*-------------------------------*/ 4708 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4709 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4710 4711 /* post the Isend of j-structure */ 4712 /*--------------------------------*/ 4713 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4714 4715 for (proc=0, k=0; proc<size; proc++) { 4716 if (!len_s[proc]) continue; 4717 i = owners[proc]; 4718 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4719 k++; 4720 } 4721 4722 /* receives and sends of j-structure are complete */ 4723 /*------------------------------------------------*/ 4724 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4725 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4726 4727 /* send and recv i-structure */ 4728 /*---------------------------*/ 4729 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4730 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4731 4732 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4733 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4734 for (proc=0,k=0; proc<size; proc++) { 4735 if (!len_s[proc]) continue; 4736 /* form outgoing message for i-structure: 4737 buf_si[0]: nrows to be sent 4738 [1:nrows]: row index (global) 4739 [nrows+1:2*nrows+1]: i-structure index 4740 */ 4741 /*-------------------------------------------*/ 4742 nrows = len_si[proc]/2 - 1; 4743 buf_si_i = buf_si + nrows+1; 4744 buf_si[0] = nrows; 4745 buf_si_i[0] = 0; 4746 nrows = 0; 4747 for (i=owners[proc]; i<owners[proc+1]; i++) { 4748 anzi = ai[i+1] - ai[i]; 4749 if (anzi) { 4750 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4751 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4752 nrows++; 4753 } 4754 } 4755 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4756 k++; 4757 buf_si += len_si[proc]; 4758 } 4759 4760 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4761 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4762 4763 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4764 for (i=0; i<merge->nrecv; i++) { 4765 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4766 } 4767 4768 ierr = PetscFree(len_si);CHKERRQ(ierr); 4769 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4770 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4771 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4772 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4773 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4774 ierr = PetscFree(status);CHKERRQ(ierr); 4775 4776 /* compute a local seq matrix in each processor */ 4777 /*----------------------------------------------*/ 4778 /* allocate bi array and free space for accumulating nonzero column info */ 4779 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4780 bi[0] = 0; 4781 4782 /* create and initialize a linked list */ 4783 nlnk = N+1; 4784 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4785 4786 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4787 len = ai[owners[rank+1]] - ai[owners[rank]]; 4788 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4789 4790 current_space = free_space; 4791 4792 /* determine symbolic info for each local row */ 4793 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4794 4795 for (k=0; k<merge->nrecv; k++) { 4796 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4797 nrows = *buf_ri_k[k]; 4798 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4799 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4800 } 4801 4802 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4803 len = 0; 4804 for (i=0; i<m; i++) { 4805 bnzi = 0; 4806 /* add local non-zero cols of this proc's seqmat into lnk */ 4807 arow = owners[rank] + i; 4808 anzi = ai[arow+1] - ai[arow]; 4809 aj = a->j + ai[arow]; 4810 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4811 bnzi += nlnk; 4812 /* add received col data into lnk */ 4813 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4814 if (i == *nextrow[k]) { /* i-th row */ 4815 anzi = *(nextai[k]+1) - *nextai[k]; 4816 aj = buf_rj[k] + *nextai[k]; 4817 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4818 bnzi += nlnk; 4819 nextrow[k]++; nextai[k]++; 4820 } 4821 } 4822 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4823 4824 /* if free space is not available, make more free space */ 4825 if (current_space->local_remaining<bnzi) { 4826 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4827 nspacedouble++; 4828 } 4829 /* copy data into free space, then initialize lnk */ 4830 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4831 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4832 4833 current_space->array += bnzi; 4834 current_space->local_used += bnzi; 4835 current_space->local_remaining -= bnzi; 4836 4837 bi[i+1] = bi[i] + bnzi; 4838 } 4839 4840 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4841 4842 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4843 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4844 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4845 4846 /* create symbolic parallel matrix B_mpi */ 4847 /*---------------------------------------*/ 4848 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4849 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4850 if (n==PETSC_DECIDE) { 4851 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4852 } else { 4853 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4854 } 4855 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4856 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4857 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4858 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4859 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4860 4861 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4862 B_mpi->assembled = PETSC_FALSE; 4863 merge->bi = bi; 4864 merge->bj = bj; 4865 merge->buf_ri = buf_ri; 4866 merge->buf_rj = buf_rj; 4867 merge->coi = NULL; 4868 merge->coj = NULL; 4869 merge->owners_co = NULL; 4870 4871 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4872 4873 /* attach the supporting struct to B_mpi for reuse */ 4874 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4875 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4876 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4877 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4878 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4879 *mpimat = B_mpi; 4880 4881 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4882 PetscFunctionReturn(0); 4883 } 4884 4885 /*@C 4886 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4887 matrices from each processor 4888 4889 Collective 4890 4891 Input Parameters: 4892 + comm - the communicators the parallel matrix will live on 4893 . seqmat - the input sequential matrices 4894 . m - number of local rows (or PETSC_DECIDE) 4895 . n - number of local columns (or PETSC_DECIDE) 4896 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4897 4898 Output Parameter: 4899 . mpimat - the parallel matrix generated 4900 4901 Level: advanced 4902 4903 Notes: 4904 The dimensions of the sequential matrix in each processor MUST be the same. 4905 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4906 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4907 @*/ 4908 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4909 { 4910 PetscErrorCode ierr; 4911 PetscMPIInt size; 4912 4913 PetscFunctionBegin; 4914 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4915 if (size == 1) { 4916 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4917 if (scall == MAT_INITIAL_MATRIX) { 4918 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4919 } else { 4920 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4921 } 4922 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4923 PetscFunctionReturn(0); 4924 } 4925 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4926 if (scall == MAT_INITIAL_MATRIX) { 4927 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4928 } 4929 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4930 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4931 PetscFunctionReturn(0); 4932 } 4933 4934 /*@ 4935 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4936 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4937 with MatGetSize() 4938 4939 Not Collective 4940 4941 Input Parameters: 4942 + A - the matrix 4943 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4944 4945 Output Parameter: 4946 . A_loc - the local sequential matrix generated 4947 4948 Level: developer 4949 4950 Notes: 4951 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4952 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4953 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4954 modify the values of the returned A_loc. 4955 4956 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4957 4958 @*/ 4959 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4960 { 4961 PetscErrorCode ierr; 4962 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4963 Mat_SeqAIJ *mat,*a,*b; 4964 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4965 MatScalar *aa,*ba,*cam; 4966 PetscScalar *ca; 4967 PetscMPIInt size; 4968 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4969 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4970 PetscBool match; 4971 4972 PetscFunctionBegin; 4973 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 4974 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4975 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4976 if (size == 1) { 4977 if (scall == MAT_INITIAL_MATRIX) { 4978 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 4979 *A_loc = mpimat->A; 4980 } else if (scall == MAT_REUSE_MATRIX) { 4981 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4982 } 4983 PetscFunctionReturn(0); 4984 } 4985 4986 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4987 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4988 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4989 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4990 aa = a->a; ba = b->a; 4991 if (scall == MAT_INITIAL_MATRIX) { 4992 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4993 ci[0] = 0; 4994 for (i=0; i<am; i++) { 4995 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4996 } 4997 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4998 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4999 k = 0; 5000 for (i=0; i<am; i++) { 5001 ncols_o = bi[i+1] - bi[i]; 5002 ncols_d = ai[i+1] - ai[i]; 5003 /* off-diagonal portion of A */ 5004 for (jo=0; jo<ncols_o; jo++) { 5005 col = cmap[*bj]; 5006 if (col >= cstart) break; 5007 cj[k] = col; bj++; 5008 ca[k++] = *ba++; 5009 } 5010 /* diagonal portion of A */ 5011 for (j=0; j<ncols_d; j++) { 5012 cj[k] = cstart + *aj++; 5013 ca[k++] = *aa++; 5014 } 5015 /* off-diagonal portion of A */ 5016 for (j=jo; j<ncols_o; j++) { 5017 cj[k] = cmap[*bj++]; 5018 ca[k++] = *ba++; 5019 } 5020 } 5021 /* put together the new matrix */ 5022 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5023 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5024 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5025 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5026 mat->free_a = PETSC_TRUE; 5027 mat->free_ij = PETSC_TRUE; 5028 mat->nonew = 0; 5029 } else if (scall == MAT_REUSE_MATRIX) { 5030 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5031 ci = mat->i; cj = mat->j; cam = mat->a; 5032 for (i=0; i<am; i++) { 5033 /* off-diagonal portion of A */ 5034 ncols_o = bi[i+1] - bi[i]; 5035 for (jo=0; jo<ncols_o; jo++) { 5036 col = cmap[*bj]; 5037 if (col >= cstart) break; 5038 *cam++ = *ba++; bj++; 5039 } 5040 /* diagonal portion of A */ 5041 ncols_d = ai[i+1] - ai[i]; 5042 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5043 /* off-diagonal portion of A */ 5044 for (j=jo; j<ncols_o; j++) { 5045 *cam++ = *ba++; bj++; 5046 } 5047 } 5048 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5049 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5050 PetscFunctionReturn(0); 5051 } 5052 5053 /*@C 5054 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5055 5056 Not Collective 5057 5058 Input Parameters: 5059 + A - the matrix 5060 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5061 - row, col - index sets of rows and columns to extract (or NULL) 5062 5063 Output Parameter: 5064 . A_loc - the local sequential matrix generated 5065 5066 Level: developer 5067 5068 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5069 5070 @*/ 5071 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5072 { 5073 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5074 PetscErrorCode ierr; 5075 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5076 IS isrowa,iscola; 5077 Mat *aloc; 5078 PetscBool match; 5079 5080 PetscFunctionBegin; 5081 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5082 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5083 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5084 if (!row) { 5085 start = A->rmap->rstart; end = A->rmap->rend; 5086 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5087 } else { 5088 isrowa = *row; 5089 } 5090 if (!col) { 5091 start = A->cmap->rstart; 5092 cmap = a->garray; 5093 nzA = a->A->cmap->n; 5094 nzB = a->B->cmap->n; 5095 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5096 ncols = 0; 5097 for (i=0; i<nzB; i++) { 5098 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5099 else break; 5100 } 5101 imark = i; 5102 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5103 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5104 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5105 } else { 5106 iscola = *col; 5107 } 5108 if (scall != MAT_INITIAL_MATRIX) { 5109 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5110 aloc[0] = *A_loc; 5111 } 5112 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5113 if (!col) { /* attach global id of condensed columns */ 5114 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5115 } 5116 *A_loc = aloc[0]; 5117 ierr = PetscFree(aloc);CHKERRQ(ierr); 5118 if (!row) { 5119 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5120 } 5121 if (!col) { 5122 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5123 } 5124 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5125 PetscFunctionReturn(0); 5126 } 5127 5128 /* 5129 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5130 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5131 * on a global size. 5132 * */ 5133 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5134 { 5135 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5136 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5137 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5138 PetscMPIInt owner; 5139 PetscSFNode *iremote,*oiremote; 5140 const PetscInt *lrowindices; 5141 PetscErrorCode ierr; 5142 PetscSF sf,osf; 5143 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5144 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5145 MPI_Comm comm; 5146 ISLocalToGlobalMapping mapping; 5147 5148 PetscFunctionBegin; 5149 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5150 /* plocalsize is the number of roots 5151 * nrows is the number of leaves 5152 * */ 5153 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5154 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5155 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5156 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5157 for (i=0;i<nrows;i++) { 5158 /* Find a remote index and an owner for a row 5159 * The row could be local or remote 5160 * */ 5161 owner = 0; 5162 lidx = 0; 5163 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5164 iremote[i].index = lidx; 5165 iremote[i].rank = owner; 5166 } 5167 /* Create SF to communicate how many nonzero columns for each row */ 5168 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5169 /* SF will figure out the number of nonzero colunms for each row, and their 5170 * offsets 5171 * */ 5172 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5173 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5174 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5175 5176 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5177 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5178 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5179 roffsets[0] = 0; 5180 roffsets[1] = 0; 5181 for (i=0;i<plocalsize;i++) { 5182 /* diag */ 5183 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5184 /* off diag */ 5185 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5186 /* compute offsets so that we relative location for each row */ 5187 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5188 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5189 } 5190 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5191 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5192 /* 'r' means root, and 'l' means leaf */ 5193 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5194 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5195 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5196 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5197 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5198 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5199 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5200 dntotalcols = 0; 5201 ontotalcols = 0; 5202 ncol = 0; 5203 for (i=0;i<nrows;i++) { 5204 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5205 ncol = PetscMax(pnnz[i],ncol); 5206 /* diag */ 5207 dntotalcols += nlcols[i*2+0]; 5208 /* off diag */ 5209 ontotalcols += nlcols[i*2+1]; 5210 } 5211 /* We do not need to figure the right number of columns 5212 * since all the calculations will be done by going through the raw data 5213 * */ 5214 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5215 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5216 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5217 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5218 /* diag */ 5219 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5220 /* off diag */ 5221 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5222 /* diag */ 5223 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5224 /* off diag */ 5225 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5226 dntotalcols = 0; 5227 ontotalcols = 0; 5228 ntotalcols = 0; 5229 for (i=0;i<nrows;i++) { 5230 owner = 0; 5231 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5232 /* Set iremote for diag matrix */ 5233 for (j=0;j<nlcols[i*2+0];j++) { 5234 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5235 iremote[dntotalcols].rank = owner; 5236 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5237 ilocal[dntotalcols++] = ntotalcols++; 5238 } 5239 /* off diag */ 5240 for (j=0;j<nlcols[i*2+1];j++) { 5241 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5242 oiremote[ontotalcols].rank = owner; 5243 oilocal[ontotalcols++] = ntotalcols++; 5244 } 5245 } 5246 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5247 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5248 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5249 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5250 /* P serves as roots and P_oth is leaves 5251 * Diag matrix 5252 * */ 5253 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5254 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5255 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5256 5257 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5258 /* Off diag */ 5259 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5260 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5261 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5262 /* We operate on the matrix internal data for saving memory */ 5263 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5264 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5265 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5266 /* Convert to global indices for diag matrix */ 5267 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5268 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5269 /* We want P_oth store global indices */ 5270 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5271 /* Use memory scalable approach */ 5272 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5273 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5274 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5275 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5276 /* Convert back to local indices */ 5277 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5278 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5279 nout = 0; 5280 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5281 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5282 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5283 /* Exchange values */ 5284 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5285 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5286 /* Stop PETSc from shrinking memory */ 5287 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5288 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5289 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5290 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5291 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5292 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5293 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5294 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5295 PetscFunctionReturn(0); 5296 } 5297 5298 /* 5299 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5300 * This supports MPIAIJ and MAIJ 5301 * */ 5302 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5303 { 5304 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5305 Mat_SeqAIJ *p_oth; 5306 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5307 IS rows,map; 5308 PetscHMapI hamp; 5309 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5310 MPI_Comm comm; 5311 PetscSF sf,osf; 5312 PetscBool has; 5313 PetscErrorCode ierr; 5314 5315 PetscFunctionBegin; 5316 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5317 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5318 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5319 * and then create a submatrix (that often is an overlapping matrix) 5320 * */ 5321 if (reuse == MAT_INITIAL_MATRIX) { 5322 /* Use a hash table to figure out unique keys */ 5323 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5324 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5325 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5326 count = 0; 5327 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5328 for (i=0;i<a->B->cmap->n;i++) { 5329 key = a->garray[i]/dof; 5330 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5331 if (!has) { 5332 mapping[i] = count; 5333 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5334 } else { 5335 /* Current 'i' has the same value the previous step */ 5336 mapping[i] = count-1; 5337 } 5338 } 5339 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5340 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5341 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5342 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5343 off = 0; 5344 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5345 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5346 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5347 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5348 /* In case, the matrix was already created but users want to recreate the matrix */ 5349 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5350 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5351 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5352 ierr = ISDestroy(&map);CHKERRQ(ierr); 5353 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5354 } else if (reuse == MAT_REUSE_MATRIX) { 5355 /* If matrix was already created, we simply update values using SF objects 5356 * that as attached to the matrix ealier. 5357 * */ 5358 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5359 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5360 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5361 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5362 /* Update values in place */ 5363 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5364 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5365 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5366 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5367 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5368 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5369 PetscFunctionReturn(0); 5370 } 5371 5372 /*@C 5373 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5374 5375 Collective on Mat 5376 5377 Input Parameters: 5378 + A,B - the matrices in mpiaij format 5379 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5380 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5381 5382 Output Parameter: 5383 + rowb, colb - index sets of rows and columns of B to extract 5384 - B_seq - the sequential matrix generated 5385 5386 Level: developer 5387 5388 @*/ 5389 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5390 { 5391 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5392 PetscErrorCode ierr; 5393 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5394 IS isrowb,iscolb; 5395 Mat *bseq=NULL; 5396 5397 PetscFunctionBegin; 5398 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5399 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5400 } 5401 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5402 5403 if (scall == MAT_INITIAL_MATRIX) { 5404 start = A->cmap->rstart; 5405 cmap = a->garray; 5406 nzA = a->A->cmap->n; 5407 nzB = a->B->cmap->n; 5408 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5409 ncols = 0; 5410 for (i=0; i<nzB; i++) { /* row < local row index */ 5411 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5412 else break; 5413 } 5414 imark = i; 5415 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5416 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5417 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5418 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5419 } else { 5420 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5421 isrowb = *rowb; iscolb = *colb; 5422 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5423 bseq[0] = *B_seq; 5424 } 5425 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5426 *B_seq = bseq[0]; 5427 ierr = PetscFree(bseq);CHKERRQ(ierr); 5428 if (!rowb) { 5429 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5430 } else { 5431 *rowb = isrowb; 5432 } 5433 if (!colb) { 5434 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5435 } else { 5436 *colb = iscolb; 5437 } 5438 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5439 PetscFunctionReturn(0); 5440 } 5441 5442 /* 5443 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5444 of the OFF-DIAGONAL portion of local A 5445 5446 Collective on Mat 5447 5448 Input Parameters: 5449 + A,B - the matrices in mpiaij format 5450 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5451 5452 Output Parameter: 5453 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5454 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5455 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5456 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5457 5458 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5459 for this matrix. This is not desirable.. 5460 5461 Level: developer 5462 5463 */ 5464 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5465 { 5466 PetscErrorCode ierr; 5467 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5468 Mat_SeqAIJ *b_oth; 5469 VecScatter ctx; 5470 MPI_Comm comm; 5471 const PetscMPIInt *rprocs,*sprocs; 5472 const PetscInt *srow,*rstarts,*sstarts; 5473 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5474 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5475 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5476 MPI_Request *rwaits = NULL,*swaits = NULL; 5477 MPI_Status rstatus; 5478 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5479 5480 PetscFunctionBegin; 5481 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5482 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5483 5484 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5485 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5486 } 5487 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5488 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5489 5490 if (size == 1) { 5491 startsj_s = NULL; 5492 bufa_ptr = NULL; 5493 *B_oth = NULL; 5494 PetscFunctionReturn(0); 5495 } 5496 5497 ctx = a->Mvctx; 5498 tag = ((PetscObject)ctx)->tag; 5499 5500 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5501 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5502 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5503 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5504 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5505 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5506 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5507 5508 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5509 if (scall == MAT_INITIAL_MATRIX) { 5510 /* i-array */ 5511 /*---------*/ 5512 /* post receives */ 5513 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5514 for (i=0; i<nrecvs; i++) { 5515 rowlen = rvalues + rstarts[i]*rbs; 5516 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5517 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5518 } 5519 5520 /* pack the outgoing message */ 5521 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5522 5523 sstartsj[0] = 0; 5524 rstartsj[0] = 0; 5525 len = 0; /* total length of j or a array to be sent */ 5526 if (nsends) { 5527 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5528 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5529 } 5530 for (i=0; i<nsends; i++) { 5531 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5532 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5533 for (j=0; j<nrows; j++) { 5534 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5535 for (l=0; l<sbs; l++) { 5536 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5537 5538 rowlen[j*sbs+l] = ncols; 5539 5540 len += ncols; 5541 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5542 } 5543 k++; 5544 } 5545 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5546 5547 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5548 } 5549 /* recvs and sends of i-array are completed */ 5550 i = nrecvs; 5551 while (i--) { 5552 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5553 } 5554 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5555 ierr = PetscFree(svalues);CHKERRQ(ierr); 5556 5557 /* allocate buffers for sending j and a arrays */ 5558 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5559 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5560 5561 /* create i-array of B_oth */ 5562 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5563 5564 b_othi[0] = 0; 5565 len = 0; /* total length of j or a array to be received */ 5566 k = 0; 5567 for (i=0; i<nrecvs; i++) { 5568 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5569 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5570 for (j=0; j<nrows; j++) { 5571 b_othi[k+1] = b_othi[k] + rowlen[j]; 5572 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5573 k++; 5574 } 5575 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5576 } 5577 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5578 5579 /* allocate space for j and a arrrays of B_oth */ 5580 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5581 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5582 5583 /* j-array */ 5584 /*---------*/ 5585 /* post receives of j-array */ 5586 for (i=0; i<nrecvs; i++) { 5587 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5588 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5589 } 5590 5591 /* pack the outgoing message j-array */ 5592 if (nsends) k = sstarts[0]; 5593 for (i=0; i<nsends; i++) { 5594 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5595 bufJ = bufj+sstartsj[i]; 5596 for (j=0; j<nrows; j++) { 5597 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5598 for (ll=0; ll<sbs; ll++) { 5599 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5600 for (l=0; l<ncols; l++) { 5601 *bufJ++ = cols[l]; 5602 } 5603 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5604 } 5605 } 5606 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5607 } 5608 5609 /* recvs and sends of j-array are completed */ 5610 i = nrecvs; 5611 while (i--) { 5612 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5613 } 5614 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5615 } else if (scall == MAT_REUSE_MATRIX) { 5616 sstartsj = *startsj_s; 5617 rstartsj = *startsj_r; 5618 bufa = *bufa_ptr; 5619 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5620 b_otha = b_oth->a; 5621 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5622 5623 /* a-array */ 5624 /*---------*/ 5625 /* post receives of a-array */ 5626 for (i=0; i<nrecvs; i++) { 5627 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5628 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5629 } 5630 5631 /* pack the outgoing message a-array */ 5632 if (nsends) k = sstarts[0]; 5633 for (i=0; i<nsends; i++) { 5634 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5635 bufA = bufa+sstartsj[i]; 5636 for (j=0; j<nrows; j++) { 5637 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5638 for (ll=0; ll<sbs; ll++) { 5639 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5640 for (l=0; l<ncols; l++) { 5641 *bufA++ = vals[l]; 5642 } 5643 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5644 } 5645 } 5646 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5647 } 5648 /* recvs and sends of a-array are completed */ 5649 i = nrecvs; 5650 while (i--) { 5651 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5652 } 5653 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5654 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5655 5656 if (scall == MAT_INITIAL_MATRIX) { 5657 /* put together the new matrix */ 5658 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5659 5660 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5661 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5662 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5663 b_oth->free_a = PETSC_TRUE; 5664 b_oth->free_ij = PETSC_TRUE; 5665 b_oth->nonew = 0; 5666 5667 ierr = PetscFree(bufj);CHKERRQ(ierr); 5668 if (!startsj_s || !bufa_ptr) { 5669 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5670 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5671 } else { 5672 *startsj_s = sstartsj; 5673 *startsj_r = rstartsj; 5674 *bufa_ptr = bufa; 5675 } 5676 } 5677 5678 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5679 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5680 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5681 PetscFunctionReturn(0); 5682 } 5683 5684 /*@C 5685 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5686 5687 Not Collective 5688 5689 Input Parameters: 5690 . A - The matrix in mpiaij format 5691 5692 Output Parameter: 5693 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5694 . colmap - A map from global column index to local index into lvec 5695 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5696 5697 Level: developer 5698 5699 @*/ 5700 #if defined(PETSC_USE_CTABLE) 5701 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5702 #else 5703 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5704 #endif 5705 { 5706 Mat_MPIAIJ *a; 5707 5708 PetscFunctionBegin; 5709 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5710 PetscValidPointer(lvec, 2); 5711 PetscValidPointer(colmap, 3); 5712 PetscValidPointer(multScatter, 4); 5713 a = (Mat_MPIAIJ*) A->data; 5714 if (lvec) *lvec = a->lvec; 5715 if (colmap) *colmap = a->colmap; 5716 if (multScatter) *multScatter = a->Mvctx; 5717 PetscFunctionReturn(0); 5718 } 5719 5720 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5721 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5722 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5723 #if defined(PETSC_HAVE_MKL_SPARSE) 5724 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5725 #endif 5726 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5727 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5728 #if defined(PETSC_HAVE_ELEMENTAL) 5729 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5730 #endif 5731 #if defined(PETSC_HAVE_SCALAPACK) 5732 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5733 #endif 5734 #if defined(PETSC_HAVE_HYPRE) 5735 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5736 #endif 5737 #if defined(PETSC_HAVE_CUDA) 5738 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5739 #endif 5740 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5741 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5742 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5743 5744 /* 5745 Computes (B'*A')' since computing B*A directly is untenable 5746 5747 n p p 5748 ( ) ( ) ( ) 5749 m ( A ) * n ( B ) = m ( C ) 5750 ( ) ( ) ( ) 5751 5752 */ 5753 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5754 { 5755 PetscErrorCode ierr; 5756 Mat At,Bt,Ct; 5757 5758 PetscFunctionBegin; 5759 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5760 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5761 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5762 ierr = MatDestroy(&At);CHKERRQ(ierr); 5763 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5764 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5765 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5766 PetscFunctionReturn(0); 5767 } 5768 5769 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5770 { 5771 PetscErrorCode ierr; 5772 PetscBool cisdense; 5773 5774 PetscFunctionBegin; 5775 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5776 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5777 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5778 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5779 if (!cisdense) { 5780 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5781 } 5782 ierr = MatSetUp(C);CHKERRQ(ierr); 5783 5784 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5785 PetscFunctionReturn(0); 5786 } 5787 5788 /* ----------------------------------------------------------------*/ 5789 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5790 { 5791 Mat_Product *product = C->product; 5792 Mat A = product->A,B=product->B; 5793 5794 PetscFunctionBegin; 5795 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5796 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5797 5798 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5799 C->ops->productsymbolic = MatProductSymbolic_AB; 5800 PetscFunctionReturn(0); 5801 } 5802 5803 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5804 { 5805 PetscErrorCode ierr; 5806 Mat_Product *product = C->product; 5807 5808 PetscFunctionBegin; 5809 if (product->type == MATPRODUCT_AB) { 5810 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5811 } 5812 PetscFunctionReturn(0); 5813 } 5814 /* ----------------------------------------------------------------*/ 5815 5816 /*MC 5817 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5818 5819 Options Database Keys: 5820 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5821 5822 Level: beginner 5823 5824 Notes: 5825 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5826 in this case the values associated with the rows and columns one passes in are set to zero 5827 in the matrix 5828 5829 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5830 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5831 5832 .seealso: MatCreateAIJ() 5833 M*/ 5834 5835 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5836 { 5837 Mat_MPIAIJ *b; 5838 PetscErrorCode ierr; 5839 PetscMPIInt size; 5840 5841 PetscFunctionBegin; 5842 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5843 5844 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5845 B->data = (void*)b; 5846 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5847 B->assembled = PETSC_FALSE; 5848 B->insertmode = NOT_SET_VALUES; 5849 b->size = size; 5850 5851 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5852 5853 /* build cache for off array entries formed */ 5854 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5855 5856 b->donotstash = PETSC_FALSE; 5857 b->colmap = 0; 5858 b->garray = 0; 5859 b->roworiented = PETSC_TRUE; 5860 5861 /* stuff used for matrix vector multiply */ 5862 b->lvec = NULL; 5863 b->Mvctx = NULL; 5864 5865 /* stuff for MatGetRow() */ 5866 b->rowindices = 0; 5867 b->rowvalues = 0; 5868 b->getrowactive = PETSC_FALSE; 5869 5870 /* flexible pointer used in CUSP/CUSPARSE classes */ 5871 b->spptr = NULL; 5872 5873 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5874 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5875 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5876 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5877 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5878 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5879 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5880 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5881 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5882 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5883 #if defined(PETSC_HAVE_MKL_SPARSE) 5884 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5885 #endif 5886 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5887 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 5888 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5889 #if defined(PETSC_HAVE_ELEMENTAL) 5890 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5891 #endif 5892 #if defined(PETSC_HAVE_SCALAPACK) 5893 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 5894 #endif 5895 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5896 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5897 #if defined(PETSC_HAVE_HYPRE) 5898 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5899 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5900 #endif 5901 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 5902 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 5903 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5904 PetscFunctionReturn(0); 5905 } 5906 5907 /*@C 5908 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5909 and "off-diagonal" part of the matrix in CSR format. 5910 5911 Collective 5912 5913 Input Parameters: 5914 + comm - MPI communicator 5915 . m - number of local rows (Cannot be PETSC_DECIDE) 5916 . n - This value should be the same as the local size used in creating the 5917 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5918 calculated if N is given) For square matrices n is almost always m. 5919 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5920 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5921 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5922 . j - column indices 5923 . a - matrix values 5924 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5925 . oj - column indices 5926 - oa - matrix values 5927 5928 Output Parameter: 5929 . mat - the matrix 5930 5931 Level: advanced 5932 5933 Notes: 5934 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5935 must free the arrays once the matrix has been destroyed and not before. 5936 5937 The i and j indices are 0 based 5938 5939 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5940 5941 This sets local rows and cannot be used to set off-processor values. 5942 5943 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5944 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5945 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5946 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5947 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5948 communication if it is known that only local entries will be set. 5949 5950 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5951 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5952 @*/ 5953 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5954 { 5955 PetscErrorCode ierr; 5956 Mat_MPIAIJ *maij; 5957 5958 PetscFunctionBegin; 5959 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5960 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5961 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5962 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5963 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5964 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5965 maij = (Mat_MPIAIJ*) (*mat)->data; 5966 5967 (*mat)->preallocated = PETSC_TRUE; 5968 5969 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5970 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5971 5972 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5973 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5974 5975 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5976 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5977 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5978 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5979 5980 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5981 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5982 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5983 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5984 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5985 PetscFunctionReturn(0); 5986 } 5987 5988 /* 5989 Special version for direct calls from Fortran 5990 */ 5991 #include <petsc/private/fortranimpl.h> 5992 5993 /* Change these macros so can be used in void function */ 5994 #undef CHKERRQ 5995 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5996 #undef SETERRQ2 5997 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5998 #undef SETERRQ3 5999 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6000 #undef SETERRQ 6001 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6002 6003 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6004 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6005 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6006 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6007 #else 6008 #endif 6009 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6010 { 6011 Mat mat = *mmat; 6012 PetscInt m = *mm, n = *mn; 6013 InsertMode addv = *maddv; 6014 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6015 PetscScalar value; 6016 PetscErrorCode ierr; 6017 6018 MatCheckPreallocated(mat,1); 6019 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6020 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6021 { 6022 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6023 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6024 PetscBool roworiented = aij->roworiented; 6025 6026 /* Some Variables required in the macro */ 6027 Mat A = aij->A; 6028 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6029 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6030 MatScalar *aa = a->a; 6031 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6032 Mat B = aij->B; 6033 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6034 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6035 MatScalar *ba = b->a; 6036 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6037 * cannot use "#if defined" inside a macro. */ 6038 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6039 6040 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6041 PetscInt nonew = a->nonew; 6042 MatScalar *ap1,*ap2; 6043 6044 PetscFunctionBegin; 6045 for (i=0; i<m; i++) { 6046 if (im[i] < 0) continue; 6047 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6048 if (im[i] >= rstart && im[i] < rend) { 6049 row = im[i] - rstart; 6050 lastcol1 = -1; 6051 rp1 = aj + ai[row]; 6052 ap1 = aa + ai[row]; 6053 rmax1 = aimax[row]; 6054 nrow1 = ailen[row]; 6055 low1 = 0; 6056 high1 = nrow1; 6057 lastcol2 = -1; 6058 rp2 = bj + bi[row]; 6059 ap2 = ba + bi[row]; 6060 rmax2 = bimax[row]; 6061 nrow2 = bilen[row]; 6062 low2 = 0; 6063 high2 = nrow2; 6064 6065 for (j=0; j<n; j++) { 6066 if (roworiented) value = v[i*n+j]; 6067 else value = v[i+j*m]; 6068 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6069 if (in[j] >= cstart && in[j] < cend) { 6070 col = in[j] - cstart; 6071 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6072 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6073 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6074 #endif 6075 } else if (in[j] < 0) continue; 6076 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6077 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6078 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6079 } else { 6080 if (mat->was_assembled) { 6081 if (!aij->colmap) { 6082 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6083 } 6084 #if defined(PETSC_USE_CTABLE) 6085 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6086 col--; 6087 #else 6088 col = aij->colmap[in[j]] - 1; 6089 #endif 6090 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6091 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6092 col = in[j]; 6093 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6094 B = aij->B; 6095 b = (Mat_SeqAIJ*)B->data; 6096 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6097 rp2 = bj + bi[row]; 6098 ap2 = ba + bi[row]; 6099 rmax2 = bimax[row]; 6100 nrow2 = bilen[row]; 6101 low2 = 0; 6102 high2 = nrow2; 6103 bm = aij->B->rmap->n; 6104 ba = b->a; 6105 inserted = PETSC_FALSE; 6106 } 6107 } else col = in[j]; 6108 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6109 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6110 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6111 #endif 6112 } 6113 } 6114 } else if (!aij->donotstash) { 6115 if (roworiented) { 6116 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6117 } else { 6118 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6119 } 6120 } 6121 } 6122 } 6123 PetscFunctionReturnVoid(); 6124 } 6125