1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 584 #endif 585 if (im[i] >= rstart && im[i] < rend) { 586 row = im[i] - rstart; 587 lastcol1 = -1; 588 rp1 = aj + ai[row]; 589 ap1 = aa + ai[row]; 590 rmax1 = aimax[row]; 591 nrow1 = ailen[row]; 592 low1 = 0; 593 high1 = nrow1; 594 lastcol2 = -1; 595 rp2 = bj + bi[row]; 596 ap2 = ba + bi[row]; 597 rmax2 = bimax[row]; 598 nrow2 = bilen[row]; 599 low2 = 0; 600 high2 = nrow2; 601 602 for (j=0; j<n; j++) { 603 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 604 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 605 if (in[j] >= cstart && in[j] < cend) { 606 col = in[j] - cstart; 607 nonew = a->nonew; 608 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 610 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 611 #endif 612 } else if (in[j] < 0) continue; 613 #if defined(PETSC_USE_DEBUG) 614 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 615 #endif 616 else { 617 if (mat->was_assembled) { 618 if (!aij->colmap) { 619 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 620 } 621 #if defined(PETSC_USE_CTABLE) 622 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 623 col--; 624 #else 625 col = aij->colmap[in[j]] - 1; 626 #endif 627 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 628 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 629 col = in[j]; 630 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 631 B = aij->B; 632 b = (Mat_SeqAIJ*)B->data; 633 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 634 rp2 = bj + bi[row]; 635 ap2 = ba + bi[row]; 636 rmax2 = bimax[row]; 637 nrow2 = bilen[row]; 638 low2 = 0; 639 high2 = nrow2; 640 bm = aij->B->rmap->n; 641 ba = b->a; 642 inserted = PETSC_FALSE; 643 } else if (col < 0) { 644 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 645 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 646 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 647 } 648 } else col = in[j]; 649 nonew = b->nonew; 650 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 652 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 653 #endif 654 } 655 } 656 } else { 657 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 658 if (!aij->donotstash) { 659 mat->assembled = PETSC_FALSE; 660 if (roworiented) { 661 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 662 } else { 663 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 664 } 665 } 666 } 667 } 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 673 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 674 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 675 */ 676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 679 Mat A = aij->A; /* diagonal part of the matrix */ 680 Mat B = aij->B; /* offdiagonal part of the matrix */ 681 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 682 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 683 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 684 PetscInt *ailen = a->ilen,*aj = a->j; 685 PetscInt *bilen = b->ilen,*bj = b->j; 686 PetscInt am = aij->A->rmap->n,j; 687 PetscInt diag_so_far = 0,dnz; 688 PetscInt offd_so_far = 0,onz; 689 690 PetscFunctionBegin; 691 /* Iterate over all rows of the matrix */ 692 for (j=0; j<am; j++) { 693 dnz = onz = 0; 694 /* Iterate over all non-zero columns of the current row */ 695 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 696 /* If column is in the diagonal */ 697 if (mat_j[col] >= cstart && mat_j[col] < cend) { 698 aj[diag_so_far++] = mat_j[col] - cstart; 699 dnz++; 700 } else { /* off-diagonal entries */ 701 bj[offd_so_far++] = mat_j[col]; 702 onz++; 703 } 704 } 705 ailen[j] = dnz; 706 bilen[j] = onz; 707 } 708 PetscFunctionReturn(0); 709 } 710 711 /* 712 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 713 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 714 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 715 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 716 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 717 */ 718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat A = aij->A; /* diagonal part of the matrix */ 722 Mat B = aij->B; /* offdiagonal part of the matrix */ 723 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 725 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 726 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 727 PetscInt *ailen = a->ilen,*aj = a->j; 728 PetscInt *bilen = b->ilen,*bj = b->j; 729 PetscInt am = aij->A->rmap->n,j; 730 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 731 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 732 PetscScalar *aa = a->a,*ba = b->a; 733 734 PetscFunctionBegin; 735 /* Iterate over all rows of the matrix */ 736 for (j=0; j<am; j++) { 737 dnz_row = onz_row = 0; 738 rowstart_offd = full_offd_i[j]; 739 rowstart_diag = full_diag_i[j]; 740 /* Iterate over all non-zero columns of the current row */ 741 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 742 /* If column is in the diagonal */ 743 if (mat_j[col] >= cstart && mat_j[col] < cend) { 744 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 745 aa[rowstart_diag+dnz_row] = mat_a[col]; 746 dnz_row++; 747 } else { /* off-diagonal entries */ 748 bj[rowstart_offd+onz_row] = mat_j[col]; 749 ba[rowstart_offd+onz_row] = mat_a[col]; 750 onz_row++; 751 } 752 } 753 ailen[j] = dnz_row; 754 bilen[j] = onz_row; 755 } 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 762 PetscErrorCode ierr; 763 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 764 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 765 766 PetscFunctionBegin; 767 for (i=0; i<m; i++) { 768 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 769 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 770 if (idxm[i] >= rstart && idxm[i] < rend) { 771 row = idxm[i] - rstart; 772 for (j=0; j<n; j++) { 773 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 774 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 775 if (idxn[j] >= cstart && idxn[j] < cend) { 776 col = idxn[j] - cstart; 777 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 778 } else { 779 if (!aij->colmap) { 780 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 781 } 782 #if defined(PETSC_USE_CTABLE) 783 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 784 col--; 785 #else 786 col = aij->colmap[idxn[j]] - 1; 787 #endif 788 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 789 else { 790 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 791 } 792 } 793 } 794 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 800 801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 PetscErrorCode ierr; 805 PetscInt nstash,reallocs; 806 807 PetscFunctionBegin; 808 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 809 810 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 811 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 812 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 813 PetscFunctionReturn(0); 814 } 815 816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 817 { 818 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 819 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 820 PetscErrorCode ierr; 821 PetscMPIInt n; 822 PetscInt i,j,rstart,ncols,flg; 823 PetscInt *row,*col; 824 PetscBool other_disassembled; 825 PetscScalar *val; 826 827 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 828 829 PetscFunctionBegin; 830 if (!aij->donotstash && !mat->nooffprocentries) { 831 while (1) { 832 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 833 if (!flg) break; 834 835 for (i=0; i<n; ) { 836 /* Now identify the consecutive vals belonging to the same row */ 837 for (j=i,rstart=row[j]; j<n; j++) { 838 if (row[j] != rstart) break; 839 } 840 if (j < n) ncols = j-i; 841 else ncols = n-i; 842 /* Now assemble all these values with a single function call */ 843 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 844 845 i = j; 846 } 847 } 848 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 849 } 850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 851 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 852 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 853 if (mat->boundtocpu) { 854 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 855 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 856 } 857 #endif 858 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 859 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 860 861 /* determine if any processor has disassembled, if so we must 862 also disassemble ourself, in order that we may reassemble. */ 863 /* 864 if nonzero structure of submatrix B cannot change then we know that 865 no processor disassembled thus we can skip this stuff 866 */ 867 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 868 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 869 if (mat->was_assembled && !other_disassembled) { 870 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 871 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 872 #endif 873 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 874 } 875 } 876 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 877 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 878 } 879 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 880 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 881 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 882 #endif 883 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 884 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 885 886 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 887 888 aij->rowvalues = 0; 889 890 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 891 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 892 893 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 894 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 895 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 896 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 897 } 898 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 899 mat->offloadmask = PETSC_OFFLOAD_BOTH; 900 #endif 901 PetscFunctionReturn(0); 902 } 903 904 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 905 { 906 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 907 PetscErrorCode ierr; 908 909 PetscFunctionBegin; 910 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 911 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 912 PetscFunctionReturn(0); 913 } 914 915 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 916 { 917 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 918 PetscObjectState sA, sB; 919 PetscInt *lrows; 920 PetscInt r, len; 921 PetscBool cong, lch, gch; 922 PetscErrorCode ierr; 923 924 PetscFunctionBegin; 925 /* get locally owned rows */ 926 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 927 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 928 /* fix right hand side if needed */ 929 if (x && b) { 930 const PetscScalar *xx; 931 PetscScalar *bb; 932 933 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 934 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 935 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 936 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 937 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 938 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 939 } 940 941 sA = mat->A->nonzerostate; 942 sB = mat->B->nonzerostate; 943 944 if (diag != 0.0 && cong) { 945 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 946 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 947 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 948 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 949 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 950 PetscInt nnwA, nnwB; 951 PetscBool nnzA, nnzB; 952 953 nnwA = aijA->nonew; 954 nnwB = aijB->nonew; 955 nnzA = aijA->keepnonzeropattern; 956 nnzB = aijB->keepnonzeropattern; 957 if (!nnzA) { 958 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 959 aijA->nonew = 0; 960 } 961 if (!nnzB) { 962 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 963 aijB->nonew = 0; 964 } 965 /* Must zero here before the next loop */ 966 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 967 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 968 for (r = 0; r < len; ++r) { 969 const PetscInt row = lrows[r] + A->rmap->rstart; 970 if (row >= A->cmap->N) continue; 971 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 972 } 973 aijA->nonew = nnwA; 974 aijB->nonew = nnwB; 975 } else { 976 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 977 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 978 } 979 ierr = PetscFree(lrows);CHKERRQ(ierr); 980 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 982 983 /* reduce nonzerostate */ 984 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 985 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 986 if (gch) A->nonzerostate++; 987 PetscFunctionReturn(0); 988 } 989 990 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 991 { 992 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 993 PetscErrorCode ierr; 994 PetscMPIInt n = A->rmap->n; 995 PetscInt i,j,r,m,len = 0; 996 PetscInt *lrows,*owners = A->rmap->range; 997 PetscMPIInt p = 0; 998 PetscSFNode *rrows; 999 PetscSF sf; 1000 const PetscScalar *xx; 1001 PetscScalar *bb,*mask; 1002 Vec xmask,lmask; 1003 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 1004 const PetscInt *aj, *ii,*ridx; 1005 PetscScalar *aa; 1006 1007 PetscFunctionBegin; 1008 /* Create SF where leaves are input rows and roots are owned rows */ 1009 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1010 for (r = 0; r < n; ++r) lrows[r] = -1; 1011 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1012 for (r = 0; r < N; ++r) { 1013 const PetscInt idx = rows[r]; 1014 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1015 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1016 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1017 } 1018 rrows[r].rank = p; 1019 rrows[r].index = rows[r] - owners[p]; 1020 } 1021 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1022 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1023 /* Collect flags for rows to be zeroed */ 1024 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1025 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1026 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1027 /* Compress and put in row numbers */ 1028 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1029 /* zero diagonal part of matrix */ 1030 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1031 /* handle off diagonal part of matrix */ 1032 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1033 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1034 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1035 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1036 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1037 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1039 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1040 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1041 PetscBool cong; 1042 1043 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1044 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1045 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1046 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1047 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1048 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1049 } 1050 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1051 /* remove zeroed rows of off diagonal matrix */ 1052 ii = aij->i; 1053 for (i=0; i<len; i++) { 1054 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1055 } 1056 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1057 if (aij->compressedrow.use) { 1058 m = aij->compressedrow.nrows; 1059 ii = aij->compressedrow.i; 1060 ridx = aij->compressedrow.rindex; 1061 for (i=0; i<m; i++) { 1062 n = ii[i+1] - ii[i]; 1063 aj = aij->j + ii[i]; 1064 aa = aij->a + ii[i]; 1065 1066 for (j=0; j<n; j++) { 1067 if (PetscAbsScalar(mask[*aj])) { 1068 if (b) bb[*ridx] -= *aa*xx[*aj]; 1069 *aa = 0.0; 1070 } 1071 aa++; 1072 aj++; 1073 } 1074 ridx++; 1075 } 1076 } else { /* do not use compressed row format */ 1077 m = l->B->rmap->n; 1078 for (i=0; i<m; i++) { 1079 n = ii[i+1] - ii[i]; 1080 aj = aij->j + ii[i]; 1081 aa = aij->a + ii[i]; 1082 for (j=0; j<n; j++) { 1083 if (PetscAbsScalar(mask[*aj])) { 1084 if (b) bb[i] -= *aa*xx[*aj]; 1085 *aa = 0.0; 1086 } 1087 aa++; 1088 aj++; 1089 } 1090 } 1091 } 1092 if (x && b) { 1093 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1094 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1095 } 1096 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1097 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1098 ierr = PetscFree(lrows);CHKERRQ(ierr); 1099 1100 /* only change matrix nonzero state if pattern was allowed to be changed */ 1101 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1102 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1103 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1104 } 1105 PetscFunctionReturn(0); 1106 } 1107 1108 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 PetscInt nt; 1113 VecScatter Mvctx = a->Mvctx; 1114 1115 PetscFunctionBegin; 1116 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1117 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1118 1119 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1121 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1122 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1123 PetscFunctionReturn(0); 1124 } 1125 1126 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1127 { 1128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1129 PetscErrorCode ierr; 1130 1131 PetscFunctionBegin; 1132 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1137 { 1138 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1139 PetscErrorCode ierr; 1140 VecScatter Mvctx = a->Mvctx; 1141 1142 PetscFunctionBegin; 1143 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1144 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1145 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1146 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1147 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1152 { 1153 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1154 PetscErrorCode ierr; 1155 1156 PetscFunctionBegin; 1157 /* do nondiagonal part */ 1158 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1159 /* do local part */ 1160 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1161 /* add partial results together */ 1162 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1163 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1168 { 1169 MPI_Comm comm; 1170 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1171 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1172 IS Me,Notme; 1173 PetscErrorCode ierr; 1174 PetscInt M,N,first,last,*notme,i; 1175 PetscBool lf; 1176 PetscMPIInt size; 1177 1178 PetscFunctionBegin; 1179 /* Easy test: symmetric diagonal block */ 1180 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1181 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1182 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1183 if (!*f) PetscFunctionReturn(0); 1184 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1185 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1186 if (size == 1) PetscFunctionReturn(0); 1187 1188 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1189 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1190 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1191 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1192 for (i=0; i<first; i++) notme[i] = i; 1193 for (i=last; i<M; i++) notme[i-last+first] = i; 1194 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1195 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1196 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1197 Aoff = Aoffs[0]; 1198 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1199 Boff = Boffs[0]; 1200 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1201 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1202 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1203 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1204 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1205 ierr = PetscFree(notme);CHKERRQ(ierr); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1210 { 1211 PetscErrorCode ierr; 1212 1213 PetscFunctionBegin; 1214 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1215 PetscFunctionReturn(0); 1216 } 1217 1218 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1219 { 1220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 /* do nondiagonal part */ 1225 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1226 /* do local part */ 1227 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1228 /* add partial results together */ 1229 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 /* 1235 This only works correctly for square matrices where the subblock A->A is the 1236 diagonal block 1237 */ 1238 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1239 { 1240 PetscErrorCode ierr; 1241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1242 1243 PetscFunctionBegin; 1244 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1245 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1246 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1251 { 1252 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1253 PetscErrorCode ierr; 1254 1255 PetscFunctionBegin; 1256 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1257 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1262 { 1263 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1264 PetscErrorCode ierr; 1265 1266 PetscFunctionBegin; 1267 #if defined(PETSC_USE_LOG) 1268 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1269 #endif 1270 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1271 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1272 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1273 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1274 #if defined(PETSC_USE_CTABLE) 1275 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1276 #else 1277 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1278 #endif 1279 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1280 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1281 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1282 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1283 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1284 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1285 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1286 1287 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1297 #if defined(PETSC_HAVE_ELEMENTAL) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 #if defined(PETSC_HAVE_HYPRE) 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 #endif 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 const PetscInt *garray = aij->garray; 1316 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1317 PetscInt *rowlens; 1318 PetscInt *colidxs; 1319 PetscScalar *matvals; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1324 1325 M = mat->rmap->N; 1326 N = mat->cmap->N; 1327 m = mat->rmap->n; 1328 rs = mat->rmap->rstart; 1329 cs = mat->cmap->rstart; 1330 nz = A->nz + B->nz; 1331 1332 /* write matrix header */ 1333 header[0] = MAT_FILE_CLASSID; 1334 header[1] = M; header[2] = N; header[3] = nz; 1335 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1336 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1337 1338 /* fill in and store row lengths */ 1339 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1340 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1341 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1342 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1343 1344 /* fill in and store column indices */ 1345 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1346 for (cnt=0, i=0; i<m; i++) { 1347 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1348 if (garray[B->j[jb]] > cs) break; 1349 colidxs[cnt++] = garray[B->j[jb]]; 1350 } 1351 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1352 colidxs[cnt++] = A->j[ja] + cs; 1353 for (; jb<B->i[i+1]; jb++) 1354 colidxs[cnt++] = garray[B->j[jb]]; 1355 } 1356 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1357 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1358 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1359 1360 /* fill in and store nonzero values */ 1361 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1362 for (cnt=0, i=0; i<m; i++) { 1363 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1364 if (garray[B->j[jb]] > cs) break; 1365 matvals[cnt++] = B->a[jb]; 1366 } 1367 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1368 matvals[cnt++] = A->a[ja]; 1369 for (; jb<B->i[i+1]; jb++) 1370 matvals[cnt++] = B->a[jb]; 1371 } 1372 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1373 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1374 ierr = PetscFree(matvals);CHKERRQ(ierr); 1375 1376 /* write block size option to the viewer's .info file */ 1377 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #include <petscdraw.h> 1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1383 { 1384 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1385 PetscErrorCode ierr; 1386 PetscMPIInt rank = aij->rank,size = aij->size; 1387 PetscBool isdraw,iascii,isbinary; 1388 PetscViewer sviewer; 1389 PetscViewerFormat format; 1390 1391 PetscFunctionBegin; 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1393 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1394 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1395 if (iascii) { 1396 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1397 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1398 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1399 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1400 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1401 for (i=0; i<(PetscInt)size; i++) { 1402 nmax = PetscMax(nmax,nz[i]); 1403 nmin = PetscMin(nmin,nz[i]); 1404 navg += nz[i]; 1405 } 1406 ierr = PetscFree(nz);CHKERRQ(ierr); 1407 navg = navg/size; 1408 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1412 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1413 MatInfo info; 1414 PetscBool inodes; 1415 1416 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1417 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1418 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1420 if (!inodes) { 1421 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1422 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1423 } else { 1424 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1425 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1426 } 1427 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1429 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1431 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1432 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1434 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1437 PetscInt inodecount,inodelimit,*inodes; 1438 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1439 if (inodes) { 1440 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1441 } else { 1442 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1446 PetscFunctionReturn(0); 1447 } 1448 } else if (isbinary) { 1449 if (size == 1) { 1450 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1451 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } else if (iascii && size == 1) { 1457 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1458 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } else if (isdraw) { 1461 PetscDraw draw; 1462 PetscBool isnull; 1463 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1464 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1465 if (isnull) PetscFunctionReturn(0); 1466 } 1467 1468 { /* assemble the entire matrix onto first processor */ 1469 Mat A = NULL, Av; 1470 IS isrow,iscol; 1471 1472 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1473 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1474 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1475 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1476 /* The commented code uses MatCreateSubMatrices instead */ 1477 /* 1478 Mat *AA, A = NULL, Av; 1479 IS isrow,iscol; 1480 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1483 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1484 if (!rank) { 1485 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1486 A = AA[0]; 1487 Av = AA[0]; 1488 } 1489 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1490 */ 1491 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1492 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1493 /* 1494 Everyone has to call to draw the matrix since the graphics waits are 1495 synchronized across all processors that share the PetscDraw object 1496 */ 1497 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1498 if (!rank) { 1499 if (((PetscObject)mat)->name) { 1500 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1501 } 1502 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1503 } 1504 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1506 ierr = MatDestroy(&A);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1512 { 1513 PetscErrorCode ierr; 1514 PetscBool iascii,isdraw,issocket,isbinary; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1519 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1521 if (iascii || isdraw || isbinary || issocket) { 1522 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1528 { 1529 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1530 PetscErrorCode ierr; 1531 Vec bb1 = 0; 1532 PetscBool hasop; 1533 1534 PetscFunctionBegin; 1535 if (flag == SOR_APPLY_UPPER) { 1536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1541 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1542 } 1543 1544 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1545 if (flag & SOR_ZERO_INITIAL_GUESS) { 1546 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1547 its--; 1548 } 1549 1550 while (its--) { 1551 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1553 1554 /* update rhs: bb1 = bb - B*x */ 1555 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1556 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1557 1558 /* local sweep */ 1559 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1560 } 1561 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1562 if (flag & SOR_ZERO_INITIAL_GUESS) { 1563 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1564 its--; 1565 } 1566 while (its--) { 1567 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 1570 /* update rhs: bb1 = bb - B*x */ 1571 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1572 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1573 1574 /* local sweep */ 1575 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1576 } 1577 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1578 if (flag & SOR_ZERO_INITIAL_GUESS) { 1579 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1580 its--; 1581 } 1582 while (its--) { 1583 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 1586 /* update rhs: bb1 = bb - B*x */ 1587 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1588 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1589 1590 /* local sweep */ 1591 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1592 } 1593 } else if (flag & SOR_EISENSTAT) { 1594 Vec xx1; 1595 1596 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1598 1599 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1600 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 if (!mat->diag) { 1602 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1603 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1604 } 1605 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1606 if (hasop) { 1607 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1608 } else { 1609 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1610 } 1611 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1612 1613 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1614 1615 /* local sweep */ 1616 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1617 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1618 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1619 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1620 1621 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1622 1623 matin->factorerrortype = mat->A->factorerrortype; 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1628 { 1629 Mat aA,aB,Aperm; 1630 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1631 PetscScalar *aa,*ba; 1632 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1633 PetscSF rowsf,sf; 1634 IS parcolp = NULL; 1635 PetscBool done; 1636 PetscErrorCode ierr; 1637 1638 PetscFunctionBegin; 1639 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1640 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1641 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1642 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1643 1644 /* Invert row permutation to find out where my rows should go */ 1645 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1646 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1647 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1649 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1650 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1651 1652 /* Invert column permutation to find out where my columns should go */ 1653 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1654 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1655 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1656 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1657 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1658 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1660 1661 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1662 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1663 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1664 1665 /* Find out where my gcols should go */ 1666 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1667 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1668 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1669 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1670 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1671 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1672 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1674 1675 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1676 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1677 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1678 for (i=0; i<m; i++) { 1679 PetscInt row = rdest[i]; 1680 PetscMPIInt rowner; 1681 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1682 for (j=ai[i]; j<ai[i+1]; j++) { 1683 PetscInt col = cdest[aj[j]]; 1684 PetscMPIInt cowner; 1685 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1686 if (rowner == cowner) dnnz[i]++; 1687 else onnz[i]++; 1688 } 1689 for (j=bi[i]; j<bi[i+1]; j++) { 1690 PetscInt col = gcdest[bj[j]]; 1691 PetscMPIInt cowner; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscLogDouble isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 MatCheckPreallocated(A,1); 1807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1808 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1809 break; 1810 case MAT_ROW_ORIENTED: 1811 MatCheckPreallocated(A,1); 1812 a->roworiented = flg; 1813 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 case MAT_SORTED_FULL: 1819 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1820 break; 1821 case MAT_IGNORE_OFF_PROC_ENTRIES: 1822 a->donotstash = flg; 1823 break; 1824 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1825 case MAT_SPD: 1826 case MAT_SYMMETRIC: 1827 case MAT_STRUCTURALLY_SYMMETRIC: 1828 case MAT_HERMITIAN: 1829 case MAT_SYMMETRY_ETERNAL: 1830 break; 1831 case MAT_SUBMAT_SINGLEIS: 1832 A->submat_singleis = flg; 1833 break; 1834 case MAT_STRUCTURE_ONLY: 1835 /* The option is handled directly by MatSetOption() */ 1836 break; 1837 default: 1838 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842 1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1844 { 1845 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1846 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1847 PetscErrorCode ierr; 1848 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1849 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1850 PetscInt *cmap,*idx_p; 1851 1852 PetscFunctionBegin; 1853 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1854 mat->getrowactive = PETSC_TRUE; 1855 1856 if (!mat->rowvalues && (idx || v)) { 1857 /* 1858 allocate enough space to hold information from the longest row. 1859 */ 1860 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1861 PetscInt max = 1,tmp; 1862 for (i=0; i<matin->rmap->n; i++) { 1863 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1864 if (max < tmp) max = tmp; 1865 } 1866 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1867 } 1868 1869 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1870 lrow = row - rstart; 1871 1872 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1873 if (!v) {pvA = 0; pvB = 0;} 1874 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1875 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1876 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1877 nztot = nzA + nzB; 1878 1879 cmap = mat->garray; 1880 if (v || idx) { 1881 if (nztot) { 1882 /* Sort by increasing column numbers, assuming A and B already sorted */ 1883 PetscInt imark = -1; 1884 if (v) { 1885 *v = v_p = mat->rowvalues; 1886 for (i=0; i<nzB; i++) { 1887 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1888 else break; 1889 } 1890 imark = i; 1891 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1892 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1893 } 1894 if (idx) { 1895 *idx = idx_p = mat->rowindices; 1896 if (imark > -1) { 1897 for (i=0; i<imark; i++) { 1898 idx_p[i] = cmap[cworkB[i]]; 1899 } 1900 } else { 1901 for (i=0; i<nzB; i++) { 1902 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1903 else break; 1904 } 1905 imark = i; 1906 } 1907 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1908 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1909 } 1910 } else { 1911 if (idx) *idx = 0; 1912 if (v) *v = 0; 1913 } 1914 } 1915 *nz = nztot; 1916 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1917 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1922 { 1923 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1924 1925 PetscFunctionBegin; 1926 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1927 aij->getrowactive = PETSC_FALSE; 1928 PetscFunctionReturn(0); 1929 } 1930 1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1932 { 1933 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1934 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1935 PetscErrorCode ierr; 1936 PetscInt i,j,cstart = mat->cmap->rstart; 1937 PetscReal sum = 0.0; 1938 MatScalar *v; 1939 1940 PetscFunctionBegin; 1941 if (aij->size == 1) { 1942 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1943 } else { 1944 if (type == NORM_FROBENIUS) { 1945 v = amat->a; 1946 for (i=0; i<amat->nz; i++) { 1947 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1948 } 1949 v = bmat->a; 1950 for (i=0; i<bmat->nz; i++) { 1951 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1952 } 1953 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1954 *norm = PetscSqrtReal(*norm); 1955 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1956 } else if (type == NORM_1) { /* max column norm */ 1957 PetscReal *tmp,*tmp2; 1958 PetscInt *jj,*garray = aij->garray; 1959 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1960 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1961 *norm = 0.0; 1962 v = amat->a; jj = amat->j; 1963 for (j=0; j<amat->nz; j++) { 1964 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1965 } 1966 v = bmat->a; jj = bmat->j; 1967 for (j=0; j<bmat->nz; j++) { 1968 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1969 } 1970 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1971 for (j=0; j<mat->cmap->N; j++) { 1972 if (tmp2[j] > *norm) *norm = tmp2[j]; 1973 } 1974 ierr = PetscFree(tmp);CHKERRQ(ierr); 1975 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1976 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1977 } else if (type == NORM_INFINITY) { /* max row norm */ 1978 PetscReal ntemp = 0.0; 1979 for (j=0; j<aij->A->rmap->n; j++) { 1980 v = amat->a + amat->i[j]; 1981 sum = 0.0; 1982 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1983 sum += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a + bmat->i[j]; 1986 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 if (sum > ntemp) ntemp = sum; 1990 } 1991 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1992 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1993 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1999 { 2000 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2001 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2002 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2003 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2004 PetscErrorCode ierr; 2005 Mat B,A_diag,*B_diag; 2006 const MatScalar *array; 2007 2008 PetscFunctionBegin; 2009 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2010 ai = Aloc->i; aj = Aloc->j; 2011 bi = Bloc->i; bj = Bloc->j; 2012 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2013 PetscInt *d_nnz,*g_nnz,*o_nnz; 2014 PetscSFNode *oloc; 2015 PETSC_UNUSED PetscSF sf; 2016 2017 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2018 /* compute d_nnz for preallocation */ 2019 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2020 for (i=0; i<ai[ma]; i++) { 2021 d_nnz[aj[i]]++; 2022 } 2023 /* compute local off-diagonal contributions */ 2024 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2025 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2026 /* map those to global */ 2027 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2028 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2029 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2030 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2031 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2032 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2033 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2034 2035 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2036 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2037 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2038 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2039 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2040 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2041 } else { 2042 B = *matout; 2043 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2044 } 2045 2046 b = (Mat_MPIAIJ*)B->data; 2047 A_diag = a->A; 2048 B_diag = &b->A; 2049 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2050 A_diag_ncol = A_diag->cmap->N; 2051 B_diag_ilen = sub_B_diag->ilen; 2052 B_diag_i = sub_B_diag->i; 2053 2054 /* Set ilen for diagonal of B */ 2055 for (i=0; i<A_diag_ncol; i++) { 2056 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2057 } 2058 2059 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2060 very quickly (=without using MatSetValues), because all writes are local. */ 2061 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2062 2063 /* copy over the B part */ 2064 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2065 array = Bloc->a; 2066 row = A->rmap->rstart; 2067 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2068 cols_tmp = cols; 2069 for (i=0; i<mb; i++) { 2070 ncol = bi[i+1]-bi[i]; 2071 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2072 row++; 2073 array += ncol; cols_tmp += ncol; 2074 } 2075 ierr = PetscFree(cols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2080 *matout = B; 2081 } else { 2082 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2088 { 2089 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2090 Mat a = aij->A,b = aij->B; 2091 PetscErrorCode ierr; 2092 PetscInt s1,s2,s3; 2093 2094 PetscFunctionBegin; 2095 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2096 if (rr) { 2097 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2098 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2099 /* Overlap communication with computation. */ 2100 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 } 2102 if (ll) { 2103 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2104 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2105 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2106 } 2107 /* scale the diagonal block */ 2108 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2109 2110 if (rr) { 2111 /* Do a scatter end and then right scale the off-diagonal block */ 2112 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2119 { 2120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2129 { 2130 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2131 Mat a,b,c,d; 2132 PetscBool flg; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 a = matA->A; b = matA->B; 2137 c = matB->A; d = matB->B; 2138 2139 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2140 if (flg) { 2141 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2142 } 2143 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2152 2153 PetscFunctionBegin; 2154 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2155 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2156 /* because of the column compression in the off-processor part of the matrix a->B, 2157 the number of columns in a->B and b->B may be different, hence we cannot call 2158 the MatCopy() directly on the two parts. If need be, we can provide a more 2159 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2160 then copying the submatrices */ 2161 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2162 } else { 2163 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2164 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2165 } 2166 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* 2180 Computes the number of nonzeros per row needed for preallocation when X and Y 2181 have different nonzero structure. 2182 */ 2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2184 { 2185 PetscInt i,j,k,nzx,nzy; 2186 2187 PetscFunctionBegin; 2188 /* Set the number of nonzeros in the new matrix */ 2189 for (i=0; i<m; i++) { 2190 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2191 nzx = xi[i+1] - xi[i]; 2192 nzy = yi[i+1] - yi[i]; 2193 nnz[i] = 0; 2194 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2195 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2196 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2197 nnz[i]++; 2198 } 2199 for (; k<nzy; k++) nnz[i]++; 2200 } 2201 PetscFunctionReturn(0); 2202 } 2203 2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt m = Y->rmap->N; 2209 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2210 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2211 2212 PetscFunctionBegin; 2213 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2214 PetscFunctionReturn(0); 2215 } 2216 2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2218 { 2219 PetscErrorCode ierr; 2220 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2221 PetscBLASInt bnz,one=1; 2222 Mat_SeqAIJ *x,*y; 2223 2224 PetscFunctionBegin; 2225 if (str == SAME_NONZERO_PATTERN) { 2226 PetscScalar alpha = a; 2227 x = (Mat_SeqAIJ*)xx->A->data; 2228 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2229 y = (Mat_SeqAIJ*)yy->A->data; 2230 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2231 x = (Mat_SeqAIJ*)xx->B->data; 2232 y = (Mat_SeqAIJ*)yy->B->data; 2233 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2234 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2235 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2236 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2237 will be updated */ 2238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2239 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2240 Y->offloadmask = PETSC_OFFLOAD_CPU; 2241 } 2242 #endif 2243 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2244 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2245 } else { 2246 Mat B; 2247 PetscInt *nnz_d,*nnz_o; 2248 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2249 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2250 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2251 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2252 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2253 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2254 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2255 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2256 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2257 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2258 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2259 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2260 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2261 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2262 } 2263 PetscFunctionReturn(0); 2264 } 2265 2266 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2267 2268 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2269 { 2270 #if defined(PETSC_USE_COMPLEX) 2271 PetscErrorCode ierr; 2272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2273 2274 PetscFunctionBegin; 2275 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2276 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2277 #else 2278 PetscFunctionBegin; 2279 #endif 2280 PetscFunctionReturn(0); 2281 } 2282 2283 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2284 { 2285 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2286 PetscErrorCode ierr; 2287 2288 PetscFunctionBegin; 2289 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2290 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2295 { 2296 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2297 PetscErrorCode ierr; 2298 2299 PetscFunctionBegin; 2300 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2301 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2302 PetscFunctionReturn(0); 2303 } 2304 2305 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2306 { 2307 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2308 PetscErrorCode ierr; 2309 PetscInt i,*idxb = 0; 2310 PetscScalar *va,*vb; 2311 Vec vtmp; 2312 2313 PetscFunctionBegin; 2314 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2315 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2316 if (idx) { 2317 for (i=0; i<A->rmap->n; i++) { 2318 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2319 } 2320 } 2321 2322 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2323 if (idx) { 2324 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2325 } 2326 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2327 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2328 2329 for (i=0; i<A->rmap->n; i++) { 2330 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2331 va[i] = vb[i]; 2332 if (idx) idx[i] = a->garray[idxb[i]]; 2333 } 2334 } 2335 2336 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2337 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2338 ierr = PetscFree(idxb);CHKERRQ(ierr); 2339 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2340 PetscFunctionReturn(0); 2341 } 2342 2343 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2344 { 2345 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2346 PetscErrorCode ierr; 2347 PetscInt i,*idxb = 0; 2348 PetscScalar *va,*vb; 2349 Vec vtmp; 2350 2351 PetscFunctionBegin; 2352 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2353 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2354 if (idx) { 2355 for (i=0; i<A->cmap->n; i++) { 2356 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2357 } 2358 } 2359 2360 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2361 if (idx) { 2362 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2363 } 2364 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2365 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2366 2367 for (i=0; i<A->rmap->n; i++) { 2368 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2369 va[i] = vb[i]; 2370 if (idx) idx[i] = a->garray[idxb[i]]; 2371 } 2372 } 2373 2374 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2375 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2376 ierr = PetscFree(idxb);CHKERRQ(ierr); 2377 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2378 PetscFunctionReturn(0); 2379 } 2380 2381 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2382 { 2383 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2384 PetscInt n = A->rmap->n; 2385 PetscInt cstart = A->cmap->rstart; 2386 PetscInt *cmap = mat->garray; 2387 PetscInt *diagIdx, *offdiagIdx; 2388 Vec diagV, offdiagV; 2389 PetscScalar *a, *diagA, *offdiagA; 2390 PetscInt r; 2391 PetscErrorCode ierr; 2392 2393 PetscFunctionBegin; 2394 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2395 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2396 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2397 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2398 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2399 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2400 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2401 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2402 for (r = 0; r < n; ++r) { 2403 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2404 a[r] = diagA[r]; 2405 idx[r] = cstart + diagIdx[r]; 2406 } else { 2407 a[r] = offdiagA[r]; 2408 idx[r] = cmap[offdiagIdx[r]]; 2409 } 2410 } 2411 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2412 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2413 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2414 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2415 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2416 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2421 { 2422 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2423 PetscInt n = A->rmap->n; 2424 PetscInt cstart = A->cmap->rstart; 2425 PetscInt *cmap = mat->garray; 2426 PetscInt *diagIdx, *offdiagIdx; 2427 Vec diagV, offdiagV; 2428 PetscScalar *a, *diagA, *offdiagA; 2429 PetscInt r; 2430 PetscErrorCode ierr; 2431 2432 PetscFunctionBegin; 2433 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2434 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2435 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2436 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2437 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2438 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2439 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2440 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2441 for (r = 0; r < n; ++r) { 2442 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2443 a[r] = diagA[r]; 2444 idx[r] = cstart + diagIdx[r]; 2445 } else { 2446 a[r] = offdiagA[r]; 2447 idx[r] = cmap[offdiagIdx[r]]; 2448 } 2449 } 2450 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2451 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2452 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2453 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2454 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2455 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2456 PetscFunctionReturn(0); 2457 } 2458 2459 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2460 { 2461 PetscErrorCode ierr; 2462 Mat *dummy; 2463 2464 PetscFunctionBegin; 2465 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2466 *newmat = *dummy; 2467 ierr = PetscFree(dummy);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2472 { 2473 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2474 PetscErrorCode ierr; 2475 2476 PetscFunctionBegin; 2477 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2478 A->factorerrortype = a->A->factorerrortype; 2479 PetscFunctionReturn(0); 2480 } 2481 2482 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2483 { 2484 PetscErrorCode ierr; 2485 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2486 2487 PetscFunctionBegin; 2488 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2489 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2490 if (x->assembled) { 2491 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2492 } else { 2493 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2494 } 2495 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2496 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2501 { 2502 PetscFunctionBegin; 2503 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2504 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2505 PetscFunctionReturn(0); 2506 } 2507 2508 /*@ 2509 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2510 2511 Collective on Mat 2512 2513 Input Parameters: 2514 + A - the matrix 2515 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2516 2517 Level: advanced 2518 2519 @*/ 2520 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2521 { 2522 PetscErrorCode ierr; 2523 2524 PetscFunctionBegin; 2525 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2526 PetscFunctionReturn(0); 2527 } 2528 2529 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2530 { 2531 PetscErrorCode ierr; 2532 PetscBool sc = PETSC_FALSE,flg; 2533 2534 PetscFunctionBegin; 2535 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2536 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2537 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2538 if (flg) { 2539 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2540 } 2541 ierr = PetscOptionsTail();CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2546 { 2547 PetscErrorCode ierr; 2548 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2549 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2550 2551 PetscFunctionBegin; 2552 if (!Y->preallocated) { 2553 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2554 } else if (!aij->nz) { 2555 PetscInt nonew = aij->nonew; 2556 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2557 aij->nonew = nonew; 2558 } 2559 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2560 PetscFunctionReturn(0); 2561 } 2562 2563 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2564 { 2565 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2566 PetscErrorCode ierr; 2567 2568 PetscFunctionBegin; 2569 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2570 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2571 if (d) { 2572 PetscInt rstart; 2573 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2574 *d += rstart; 2575 2576 } 2577 PetscFunctionReturn(0); 2578 } 2579 2580 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2581 { 2582 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2583 PetscErrorCode ierr; 2584 2585 PetscFunctionBegin; 2586 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2587 PetscFunctionReturn(0); 2588 } 2589 2590 /* -------------------------------------------------------------------*/ 2591 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2592 MatGetRow_MPIAIJ, 2593 MatRestoreRow_MPIAIJ, 2594 MatMult_MPIAIJ, 2595 /* 4*/ MatMultAdd_MPIAIJ, 2596 MatMultTranspose_MPIAIJ, 2597 MatMultTransposeAdd_MPIAIJ, 2598 0, 2599 0, 2600 0, 2601 /*10*/ 0, 2602 0, 2603 0, 2604 MatSOR_MPIAIJ, 2605 MatTranspose_MPIAIJ, 2606 /*15*/ MatGetInfo_MPIAIJ, 2607 MatEqual_MPIAIJ, 2608 MatGetDiagonal_MPIAIJ, 2609 MatDiagonalScale_MPIAIJ, 2610 MatNorm_MPIAIJ, 2611 /*20*/ MatAssemblyBegin_MPIAIJ, 2612 MatAssemblyEnd_MPIAIJ, 2613 MatSetOption_MPIAIJ, 2614 MatZeroEntries_MPIAIJ, 2615 /*24*/ MatZeroRows_MPIAIJ, 2616 0, 2617 0, 2618 0, 2619 0, 2620 /*29*/ MatSetUp_MPIAIJ, 2621 0, 2622 0, 2623 MatGetDiagonalBlock_MPIAIJ, 2624 0, 2625 /*34*/ MatDuplicate_MPIAIJ, 2626 0, 2627 0, 2628 0, 2629 0, 2630 /*39*/ MatAXPY_MPIAIJ, 2631 MatCreateSubMatrices_MPIAIJ, 2632 MatIncreaseOverlap_MPIAIJ, 2633 MatGetValues_MPIAIJ, 2634 MatCopy_MPIAIJ, 2635 /*44*/ MatGetRowMax_MPIAIJ, 2636 MatScale_MPIAIJ, 2637 MatShift_MPIAIJ, 2638 MatDiagonalSet_MPIAIJ, 2639 MatZeroRowsColumns_MPIAIJ, 2640 /*49*/ MatSetRandom_MPIAIJ, 2641 0, 2642 0, 2643 0, 2644 0, 2645 /*54*/ MatFDColoringCreate_MPIXAIJ, 2646 0, 2647 MatSetUnfactored_MPIAIJ, 2648 MatPermute_MPIAIJ, 2649 0, 2650 /*59*/ MatCreateSubMatrix_MPIAIJ, 2651 MatDestroy_MPIAIJ, 2652 MatView_MPIAIJ, 2653 0, 2654 0, 2655 /*64*/ 0, 2656 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2657 0, 2658 0, 2659 0, 2660 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2661 MatGetRowMinAbs_MPIAIJ, 2662 0, 2663 0, 2664 0, 2665 0, 2666 /*75*/ MatFDColoringApply_AIJ, 2667 MatSetFromOptions_MPIAIJ, 2668 0, 2669 0, 2670 MatFindZeroDiagonals_MPIAIJ, 2671 /*80*/ 0, 2672 0, 2673 0, 2674 /*83*/ MatLoad_MPIAIJ, 2675 MatIsSymmetric_MPIAIJ, 2676 0, 2677 0, 2678 0, 2679 0, 2680 /*89*/ 0, 2681 0, 2682 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2683 0, 2684 0, 2685 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2686 0, 2687 0, 2688 0, 2689 MatBindToCPU_MPIAIJ, 2690 /*99*/ MatProductSetFromOptions_MPIAIJ, 2691 0, 2692 0, 2693 MatConjugate_MPIAIJ, 2694 0, 2695 /*104*/MatSetValuesRow_MPIAIJ, 2696 MatRealPart_MPIAIJ, 2697 MatImaginaryPart_MPIAIJ, 2698 0, 2699 0, 2700 /*109*/0, 2701 0, 2702 MatGetRowMin_MPIAIJ, 2703 0, 2704 MatMissingDiagonal_MPIAIJ, 2705 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2706 0, 2707 MatGetGhosts_MPIAIJ, 2708 0, 2709 0, 2710 /*119*/0, 2711 0, 2712 0, 2713 0, 2714 MatGetMultiProcBlock_MPIAIJ, 2715 /*124*/MatFindNonzeroRows_MPIAIJ, 2716 MatGetColumnNorms_MPIAIJ, 2717 MatInvertBlockDiagonal_MPIAIJ, 2718 MatInvertVariableBlockDiagonal_MPIAIJ, 2719 MatCreateSubMatricesMPI_MPIAIJ, 2720 /*129*/0, 2721 0, 2722 0, 2723 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2724 0, 2725 /*134*/0, 2726 0, 2727 0, 2728 0, 2729 0, 2730 /*139*/MatSetBlockSizes_MPIAIJ, 2731 0, 2732 0, 2733 MatFDColoringSetUp_MPIXAIJ, 2734 MatFindOffBlockDiagonalEntries_MPIAIJ, 2735 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2736 /*145*/0, 2737 0, 2738 0 2739 }; 2740 2741 /* ----------------------------------------------------------------------------------------*/ 2742 2743 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2744 { 2745 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2746 PetscErrorCode ierr; 2747 2748 PetscFunctionBegin; 2749 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2750 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2751 PetscFunctionReturn(0); 2752 } 2753 2754 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2755 { 2756 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2757 PetscErrorCode ierr; 2758 2759 PetscFunctionBegin; 2760 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2761 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2762 PetscFunctionReturn(0); 2763 } 2764 2765 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2766 { 2767 Mat_MPIAIJ *b; 2768 PetscErrorCode ierr; 2769 PetscMPIInt size; 2770 2771 PetscFunctionBegin; 2772 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2773 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2774 b = (Mat_MPIAIJ*)B->data; 2775 2776 #if defined(PETSC_USE_CTABLE) 2777 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2778 #else 2779 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2780 #endif 2781 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2782 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2783 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2784 2785 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2786 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2787 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2788 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2789 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2790 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2791 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2792 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2793 2794 if (!B->preallocated) { 2795 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2796 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2797 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2798 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2799 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2800 } 2801 2802 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2803 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2804 B->preallocated = PETSC_TRUE; 2805 B->was_assembled = PETSC_FALSE; 2806 B->assembled = PETSC_FALSE; 2807 PetscFunctionReturn(0); 2808 } 2809 2810 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2811 { 2812 Mat_MPIAIJ *b; 2813 PetscErrorCode ierr; 2814 2815 PetscFunctionBegin; 2816 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2817 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2818 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2819 b = (Mat_MPIAIJ*)B->data; 2820 2821 #if defined(PETSC_USE_CTABLE) 2822 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2823 #else 2824 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2825 #endif 2826 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2827 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2828 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2829 2830 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2831 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2832 B->preallocated = PETSC_TRUE; 2833 B->was_assembled = PETSC_FALSE; 2834 B->assembled = PETSC_FALSE; 2835 PetscFunctionReturn(0); 2836 } 2837 2838 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2839 { 2840 Mat mat; 2841 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2842 PetscErrorCode ierr; 2843 2844 PetscFunctionBegin; 2845 *newmat = 0; 2846 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2847 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2848 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2849 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2850 a = (Mat_MPIAIJ*)mat->data; 2851 2852 mat->factortype = matin->factortype; 2853 mat->assembled = matin->assembled; 2854 mat->insertmode = NOT_SET_VALUES; 2855 mat->preallocated = matin->preallocated; 2856 2857 a->size = oldmat->size; 2858 a->rank = oldmat->rank; 2859 a->donotstash = oldmat->donotstash; 2860 a->roworiented = oldmat->roworiented; 2861 a->rowindices = NULL; 2862 a->rowvalues = NULL; 2863 a->getrowactive = PETSC_FALSE; 2864 2865 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2866 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2867 2868 if (oldmat->colmap) { 2869 #if defined(PETSC_USE_CTABLE) 2870 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2871 #else 2872 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2873 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2874 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2875 #endif 2876 } else a->colmap = NULL; 2877 if (oldmat->garray) { 2878 PetscInt len; 2879 len = oldmat->B->cmap->n; 2880 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2881 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2882 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2883 } else a->garray = NULL; 2884 2885 /* It may happen MatDuplicate is called with a non-assembled matrix 2886 In fact, MatDuplicate only requires the matrix to be preallocated 2887 This may happen inside a DMCreateMatrix_Shell */ 2888 if (oldmat->lvec) { 2889 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2890 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2891 } 2892 if (oldmat->Mvctx) { 2893 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2894 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2895 } 2896 if (oldmat->Mvctx_mpi1) { 2897 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2898 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2899 } 2900 2901 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2902 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2903 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2904 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2905 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2906 *newmat = mat; 2907 PetscFunctionReturn(0); 2908 } 2909 2910 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2911 { 2912 PetscBool isbinary, ishdf5; 2913 PetscErrorCode ierr; 2914 2915 PetscFunctionBegin; 2916 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2917 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2918 /* force binary viewer to load .info file if it has not yet done so */ 2919 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2920 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2921 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2922 if (isbinary) { 2923 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2924 } else if (ishdf5) { 2925 #if defined(PETSC_HAVE_HDF5) 2926 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2927 #else 2928 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2929 #endif 2930 } else { 2931 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2932 } 2933 PetscFunctionReturn(0); 2934 } 2935 2936 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2937 { 2938 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2939 PetscInt *rowidxs,*colidxs; 2940 PetscScalar *matvals; 2941 PetscErrorCode ierr; 2942 2943 PetscFunctionBegin; 2944 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2945 2946 /* read in matrix header */ 2947 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2948 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2949 M = header[1]; N = header[2]; nz = header[3]; 2950 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 2951 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 2952 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2953 2954 /* set block sizes from the viewer's .info file */ 2955 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 2956 /* set global sizes if not set already */ 2957 if (mat->rmap->N < 0) mat->rmap->N = M; 2958 if (mat->cmap->N < 0) mat->cmap->N = N; 2959 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 2960 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 2961 2962 /* check if the matrix sizes are correct */ 2963 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2964 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 2965 2966 /* read in row lengths and build row indices */ 2967 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 2968 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 2969 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 2970 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 2971 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 2972 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 2973 /* read in column indices and matrix values */ 2974 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 2975 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 2976 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 2977 /* store matrix indices and values */ 2978 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 2979 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 2980 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 2981 PetscFunctionReturn(0); 2982 } 2983 2984 /* Not scalable because of ISAllGather() unless getting all columns. */ 2985 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2986 { 2987 PetscErrorCode ierr; 2988 IS iscol_local; 2989 PetscBool isstride; 2990 PetscMPIInt lisstride=0,gisstride; 2991 2992 PetscFunctionBegin; 2993 /* check if we are grabbing all columns*/ 2994 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2995 2996 if (isstride) { 2997 PetscInt start,len,mstart,mlen; 2998 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2999 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3000 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3001 if (mstart == start && mlen-mstart == len) lisstride = 1; 3002 } 3003 3004 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3005 if (gisstride) { 3006 PetscInt N; 3007 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3008 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3009 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3010 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3011 } else { 3012 PetscInt cbs; 3013 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3014 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3015 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3016 } 3017 3018 *isseq = iscol_local; 3019 PetscFunctionReturn(0); 3020 } 3021 3022 /* 3023 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3024 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3025 3026 Input Parameters: 3027 mat - matrix 3028 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3029 i.e., mat->rstart <= isrow[i] < mat->rend 3030 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3031 i.e., mat->cstart <= iscol[i] < mat->cend 3032 Output Parameter: 3033 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3034 iscol_o - sequential column index set for retrieving mat->B 3035 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3036 */ 3037 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3038 { 3039 PetscErrorCode ierr; 3040 Vec x,cmap; 3041 const PetscInt *is_idx; 3042 PetscScalar *xarray,*cmaparray; 3043 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3044 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3045 Mat B=a->B; 3046 Vec lvec=a->lvec,lcmap; 3047 PetscInt i,cstart,cend,Bn=B->cmap->N; 3048 MPI_Comm comm; 3049 VecScatter Mvctx=a->Mvctx; 3050 3051 PetscFunctionBegin; 3052 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3053 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3054 3055 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3056 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3057 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3058 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3059 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3060 3061 /* Get start indices */ 3062 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3063 isstart -= ncols; 3064 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3065 3066 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3067 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3068 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3069 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3070 for (i=0; i<ncols; i++) { 3071 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3072 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3073 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3074 } 3075 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3076 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3077 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3078 3079 /* Get iscol_d */ 3080 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3081 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3082 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3083 3084 /* Get isrow_d */ 3085 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3086 rstart = mat->rmap->rstart; 3087 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3088 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3089 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3090 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3091 3092 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3093 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3094 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3095 3096 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3097 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3098 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3099 3100 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3101 3102 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3103 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3104 3105 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3106 /* off-process column indices */ 3107 count = 0; 3108 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3109 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3110 3111 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3112 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3113 for (i=0; i<Bn; i++) { 3114 if (PetscRealPart(xarray[i]) > -1.0) { 3115 idx[count] = i; /* local column index in off-diagonal part B */ 3116 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3117 count++; 3118 } 3119 } 3120 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3121 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3122 3123 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3124 /* cannot ensure iscol_o has same blocksize as iscol! */ 3125 3126 ierr = PetscFree(idx);CHKERRQ(ierr); 3127 *garray = cmap1; 3128 3129 ierr = VecDestroy(&x);CHKERRQ(ierr); 3130 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3131 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3132 PetscFunctionReturn(0); 3133 } 3134 3135 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3136 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3137 { 3138 PetscErrorCode ierr; 3139 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3140 Mat M = NULL; 3141 MPI_Comm comm; 3142 IS iscol_d,isrow_d,iscol_o; 3143 Mat Asub = NULL,Bsub = NULL; 3144 PetscInt n; 3145 3146 PetscFunctionBegin; 3147 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3148 3149 if (call == MAT_REUSE_MATRIX) { 3150 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3151 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3152 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3153 3154 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3155 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3156 3157 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3158 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3159 3160 /* Update diagonal and off-diagonal portions of submat */ 3161 asub = (Mat_MPIAIJ*)(*submat)->data; 3162 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3163 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3164 if (n) { 3165 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3166 } 3167 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3168 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3169 3170 } else { /* call == MAT_INITIAL_MATRIX) */ 3171 const PetscInt *garray; 3172 PetscInt BsubN; 3173 3174 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3175 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3176 3177 /* Create local submatrices Asub and Bsub */ 3178 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3179 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3180 3181 /* Create submatrix M */ 3182 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3183 3184 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3185 asub = (Mat_MPIAIJ*)M->data; 3186 3187 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3188 n = asub->B->cmap->N; 3189 if (BsubN > n) { 3190 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3191 const PetscInt *idx; 3192 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3193 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3194 3195 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3196 j = 0; 3197 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3198 for (i=0; i<n; i++) { 3199 if (j >= BsubN) break; 3200 while (subgarray[i] > garray[j]) j++; 3201 3202 if (subgarray[i] == garray[j]) { 3203 idx_new[i] = idx[j++]; 3204 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3205 } 3206 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3207 3208 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3209 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3210 3211 } else if (BsubN < n) { 3212 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3213 } 3214 3215 ierr = PetscFree(garray);CHKERRQ(ierr); 3216 *submat = M; 3217 3218 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3219 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3220 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3221 3222 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3223 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3224 3225 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3226 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3227 } 3228 PetscFunctionReturn(0); 3229 } 3230 3231 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3232 { 3233 PetscErrorCode ierr; 3234 IS iscol_local=NULL,isrow_d; 3235 PetscInt csize; 3236 PetscInt n,i,j,start,end; 3237 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3238 MPI_Comm comm; 3239 3240 PetscFunctionBegin; 3241 /* If isrow has same processor distribution as mat, 3242 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3243 if (call == MAT_REUSE_MATRIX) { 3244 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3245 if (isrow_d) { 3246 sameRowDist = PETSC_TRUE; 3247 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3248 } else { 3249 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3250 if (iscol_local) { 3251 sameRowDist = PETSC_TRUE; 3252 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3253 } 3254 } 3255 } else { 3256 /* Check if isrow has same processor distribution as mat */ 3257 sameDist[0] = PETSC_FALSE; 3258 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3259 if (!n) { 3260 sameDist[0] = PETSC_TRUE; 3261 } else { 3262 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3263 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3264 if (i >= start && j < end) { 3265 sameDist[0] = PETSC_TRUE; 3266 } 3267 } 3268 3269 /* Check if iscol has same processor distribution as mat */ 3270 sameDist[1] = PETSC_FALSE; 3271 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3272 if (!n) { 3273 sameDist[1] = PETSC_TRUE; 3274 } else { 3275 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3276 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3277 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3278 } 3279 3280 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3281 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3282 sameRowDist = tsameDist[0]; 3283 } 3284 3285 if (sameRowDist) { 3286 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3287 /* isrow and iscol have same processor distribution as mat */ 3288 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3289 PetscFunctionReturn(0); 3290 } else { /* sameRowDist */ 3291 /* isrow has same processor distribution as mat */ 3292 if (call == MAT_INITIAL_MATRIX) { 3293 PetscBool sorted; 3294 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3295 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3296 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3297 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3298 3299 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3300 if (sorted) { 3301 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3302 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3303 PetscFunctionReturn(0); 3304 } 3305 } else { /* call == MAT_REUSE_MATRIX */ 3306 IS iscol_sub; 3307 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3308 if (iscol_sub) { 3309 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3310 PetscFunctionReturn(0); 3311 } 3312 } 3313 } 3314 } 3315 3316 /* General case: iscol -> iscol_local which has global size of iscol */ 3317 if (call == MAT_REUSE_MATRIX) { 3318 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3319 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3320 } else { 3321 if (!iscol_local) { 3322 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3323 } 3324 } 3325 3326 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3327 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3328 3329 if (call == MAT_INITIAL_MATRIX) { 3330 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3331 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3332 } 3333 PetscFunctionReturn(0); 3334 } 3335 3336 /*@C 3337 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3338 and "off-diagonal" part of the matrix in CSR format. 3339 3340 Collective 3341 3342 Input Parameters: 3343 + comm - MPI communicator 3344 . A - "diagonal" portion of matrix 3345 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3346 - garray - global index of B columns 3347 3348 Output Parameter: 3349 . mat - the matrix, with input A as its local diagonal matrix 3350 Level: advanced 3351 3352 Notes: 3353 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3354 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3355 3356 .seealso: MatCreateMPIAIJWithSplitArrays() 3357 @*/ 3358 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3359 { 3360 PetscErrorCode ierr; 3361 Mat_MPIAIJ *maij; 3362 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3363 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3364 PetscScalar *oa=b->a; 3365 Mat Bnew; 3366 PetscInt m,n,N; 3367 3368 PetscFunctionBegin; 3369 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3370 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3371 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3372 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3373 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3374 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3375 3376 /* Get global columns of mat */ 3377 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3378 3379 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3380 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3381 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3382 maij = (Mat_MPIAIJ*)(*mat)->data; 3383 3384 (*mat)->preallocated = PETSC_TRUE; 3385 3386 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3387 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3388 3389 /* Set A as diagonal portion of *mat */ 3390 maij->A = A; 3391 3392 nz = oi[m]; 3393 for (i=0; i<nz; i++) { 3394 col = oj[i]; 3395 oj[i] = garray[col]; 3396 } 3397 3398 /* Set Bnew as off-diagonal portion of *mat */ 3399 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3400 bnew = (Mat_SeqAIJ*)Bnew->data; 3401 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3402 maij->B = Bnew; 3403 3404 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3405 3406 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3407 b->free_a = PETSC_FALSE; 3408 b->free_ij = PETSC_FALSE; 3409 ierr = MatDestroy(&B);CHKERRQ(ierr); 3410 3411 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3412 bnew->free_a = PETSC_TRUE; 3413 bnew->free_ij = PETSC_TRUE; 3414 3415 /* condense columns of maij->B */ 3416 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3417 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3418 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3419 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3420 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3421 PetscFunctionReturn(0); 3422 } 3423 3424 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3425 3426 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3427 { 3428 PetscErrorCode ierr; 3429 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3430 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3431 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3432 Mat M,Msub,B=a->B; 3433 MatScalar *aa; 3434 Mat_SeqAIJ *aij; 3435 PetscInt *garray = a->garray,*colsub,Ncols; 3436 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3437 IS iscol_sub,iscmap; 3438 const PetscInt *is_idx,*cmap; 3439 PetscBool allcolumns=PETSC_FALSE; 3440 MPI_Comm comm; 3441 3442 PetscFunctionBegin; 3443 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3444 3445 if (call == MAT_REUSE_MATRIX) { 3446 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3447 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3448 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3449 3450 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3451 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3452 3453 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3454 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3455 3456 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3457 3458 } else { /* call == MAT_INITIAL_MATRIX) */ 3459 PetscBool flg; 3460 3461 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3462 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3463 3464 /* (1) iscol -> nonscalable iscol_local */ 3465 /* Check for special case: each processor gets entire matrix columns */ 3466 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3467 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3468 if (allcolumns) { 3469 iscol_sub = iscol_local; 3470 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3471 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3472 3473 } else { 3474 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3475 PetscInt *idx,*cmap1,k; 3476 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3477 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3478 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3479 count = 0; 3480 k = 0; 3481 for (i=0; i<Ncols; i++) { 3482 j = is_idx[i]; 3483 if (j >= cstart && j < cend) { 3484 /* diagonal part of mat */ 3485 idx[count] = j; 3486 cmap1[count++] = i; /* column index in submat */ 3487 } else if (Bn) { 3488 /* off-diagonal part of mat */ 3489 if (j == garray[k]) { 3490 idx[count] = j; 3491 cmap1[count++] = i; /* column index in submat */ 3492 } else if (j > garray[k]) { 3493 while (j > garray[k] && k < Bn-1) k++; 3494 if (j == garray[k]) { 3495 idx[count] = j; 3496 cmap1[count++] = i; /* column index in submat */ 3497 } 3498 } 3499 } 3500 } 3501 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3502 3503 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3504 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3505 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3506 3507 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3508 } 3509 3510 /* (3) Create sequential Msub */ 3511 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3512 } 3513 3514 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3515 aij = (Mat_SeqAIJ*)(Msub)->data; 3516 ii = aij->i; 3517 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3518 3519 /* 3520 m - number of local rows 3521 Ncols - number of columns (same on all processors) 3522 rstart - first row in new global matrix generated 3523 */ 3524 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3525 3526 if (call == MAT_INITIAL_MATRIX) { 3527 /* (4) Create parallel newmat */ 3528 PetscMPIInt rank,size; 3529 PetscInt csize; 3530 3531 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3532 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3533 3534 /* 3535 Determine the number of non-zeros in the diagonal and off-diagonal 3536 portions of the matrix in order to do correct preallocation 3537 */ 3538 3539 /* first get start and end of "diagonal" columns */ 3540 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3541 if (csize == PETSC_DECIDE) { 3542 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3543 if (mglobal == Ncols) { /* square matrix */ 3544 nlocal = m; 3545 } else { 3546 nlocal = Ncols/size + ((Ncols % size) > rank); 3547 } 3548 } else { 3549 nlocal = csize; 3550 } 3551 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3552 rstart = rend - nlocal; 3553 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3554 3555 /* next, compute all the lengths */ 3556 jj = aij->j; 3557 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3558 olens = dlens + m; 3559 for (i=0; i<m; i++) { 3560 jend = ii[i+1] - ii[i]; 3561 olen = 0; 3562 dlen = 0; 3563 for (j=0; j<jend; j++) { 3564 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3565 else dlen++; 3566 jj++; 3567 } 3568 olens[i] = olen; 3569 dlens[i] = dlen; 3570 } 3571 3572 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3573 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3574 3575 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3576 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3577 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3578 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3579 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3580 ierr = PetscFree(dlens);CHKERRQ(ierr); 3581 3582 } else { /* call == MAT_REUSE_MATRIX */ 3583 M = *newmat; 3584 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3585 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3586 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3587 /* 3588 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3589 rather than the slower MatSetValues(). 3590 */ 3591 M->was_assembled = PETSC_TRUE; 3592 M->assembled = PETSC_FALSE; 3593 } 3594 3595 /* (5) Set values of Msub to *newmat */ 3596 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3597 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3598 3599 jj = aij->j; 3600 aa = aij->a; 3601 for (i=0; i<m; i++) { 3602 row = rstart + i; 3603 nz = ii[i+1] - ii[i]; 3604 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3605 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3606 jj += nz; aa += nz; 3607 } 3608 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3609 3610 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3611 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3612 3613 ierr = PetscFree(colsub);CHKERRQ(ierr); 3614 3615 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3616 if (call == MAT_INITIAL_MATRIX) { 3617 *newmat = M; 3618 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3619 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3620 3621 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3622 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3623 3624 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3625 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3626 3627 if (iscol_local) { 3628 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3629 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3630 } 3631 } 3632 PetscFunctionReturn(0); 3633 } 3634 3635 /* 3636 Not great since it makes two copies of the submatrix, first an SeqAIJ 3637 in local and then by concatenating the local matrices the end result. 3638 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3639 3640 Note: This requires a sequential iscol with all indices. 3641 */ 3642 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3643 { 3644 PetscErrorCode ierr; 3645 PetscMPIInt rank,size; 3646 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3647 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3648 Mat M,Mreuse; 3649 MatScalar *aa,*vwork; 3650 MPI_Comm comm; 3651 Mat_SeqAIJ *aij; 3652 PetscBool colflag,allcolumns=PETSC_FALSE; 3653 3654 PetscFunctionBegin; 3655 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3656 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3657 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3658 3659 /* Check for special case: each processor gets entire matrix columns */ 3660 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3661 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3662 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3663 3664 if (call == MAT_REUSE_MATRIX) { 3665 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3666 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3667 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3668 } else { 3669 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3670 } 3671 3672 /* 3673 m - number of local rows 3674 n - number of columns (same on all processors) 3675 rstart - first row in new global matrix generated 3676 */ 3677 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3678 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3679 if (call == MAT_INITIAL_MATRIX) { 3680 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3681 ii = aij->i; 3682 jj = aij->j; 3683 3684 /* 3685 Determine the number of non-zeros in the diagonal and off-diagonal 3686 portions of the matrix in order to do correct preallocation 3687 */ 3688 3689 /* first get start and end of "diagonal" columns */ 3690 if (csize == PETSC_DECIDE) { 3691 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3692 if (mglobal == n) { /* square matrix */ 3693 nlocal = m; 3694 } else { 3695 nlocal = n/size + ((n % size) > rank); 3696 } 3697 } else { 3698 nlocal = csize; 3699 } 3700 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3701 rstart = rend - nlocal; 3702 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3703 3704 /* next, compute all the lengths */ 3705 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3706 olens = dlens + m; 3707 for (i=0; i<m; i++) { 3708 jend = ii[i+1] - ii[i]; 3709 olen = 0; 3710 dlen = 0; 3711 for (j=0; j<jend; j++) { 3712 if (*jj < rstart || *jj >= rend) olen++; 3713 else dlen++; 3714 jj++; 3715 } 3716 olens[i] = olen; 3717 dlens[i] = dlen; 3718 } 3719 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3720 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3721 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3722 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3723 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3724 ierr = PetscFree(dlens);CHKERRQ(ierr); 3725 } else { 3726 PetscInt ml,nl; 3727 3728 M = *newmat; 3729 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3730 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3731 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3732 /* 3733 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3734 rather than the slower MatSetValues(). 3735 */ 3736 M->was_assembled = PETSC_TRUE; 3737 M->assembled = PETSC_FALSE; 3738 } 3739 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3740 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3741 ii = aij->i; 3742 jj = aij->j; 3743 aa = aij->a; 3744 for (i=0; i<m; i++) { 3745 row = rstart + i; 3746 nz = ii[i+1] - ii[i]; 3747 cwork = jj; jj += nz; 3748 vwork = aa; aa += nz; 3749 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3750 } 3751 3752 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3753 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3754 *newmat = M; 3755 3756 /* save submatrix used in processor for next request */ 3757 if (call == MAT_INITIAL_MATRIX) { 3758 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3759 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3760 } 3761 PetscFunctionReturn(0); 3762 } 3763 3764 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3765 { 3766 PetscInt m,cstart, cend,j,nnz,i,d; 3767 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3768 const PetscInt *JJ; 3769 PetscErrorCode ierr; 3770 PetscBool nooffprocentries; 3771 3772 PetscFunctionBegin; 3773 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3774 3775 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3776 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3777 m = B->rmap->n; 3778 cstart = B->cmap->rstart; 3779 cend = B->cmap->rend; 3780 rstart = B->rmap->rstart; 3781 3782 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3783 3784 #if defined(PETSC_USE_DEBUG) 3785 for (i=0; i<m; i++) { 3786 nnz = Ii[i+1]- Ii[i]; 3787 JJ = J + Ii[i]; 3788 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3789 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3790 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3791 } 3792 #endif 3793 3794 for (i=0; i<m; i++) { 3795 nnz = Ii[i+1]- Ii[i]; 3796 JJ = J + Ii[i]; 3797 nnz_max = PetscMax(nnz_max,nnz); 3798 d = 0; 3799 for (j=0; j<nnz; j++) { 3800 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3801 } 3802 d_nnz[i] = d; 3803 o_nnz[i] = nnz - d; 3804 } 3805 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3806 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3807 3808 for (i=0; i<m; i++) { 3809 ii = i + rstart; 3810 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3811 } 3812 nooffprocentries = B->nooffprocentries; 3813 B->nooffprocentries = PETSC_TRUE; 3814 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3815 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3816 B->nooffprocentries = nooffprocentries; 3817 3818 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3819 PetscFunctionReturn(0); 3820 } 3821 3822 /*@ 3823 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3824 (the default parallel PETSc format). 3825 3826 Collective 3827 3828 Input Parameters: 3829 + B - the matrix 3830 . i - the indices into j for the start of each local row (starts with zero) 3831 . j - the column indices for each local row (starts with zero) 3832 - v - optional values in the matrix 3833 3834 Level: developer 3835 3836 Notes: 3837 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3838 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3839 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3840 3841 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3842 3843 The format which is used for the sparse matrix input, is equivalent to a 3844 row-major ordering.. i.e for the following matrix, the input data expected is 3845 as shown 3846 3847 $ 1 0 0 3848 $ 2 0 3 P0 3849 $ ------- 3850 $ 4 5 6 P1 3851 $ 3852 $ Process0 [P0]: rows_owned=[0,1] 3853 $ i = {0,1,3} [size = nrow+1 = 2+1] 3854 $ j = {0,0,2} [size = 3] 3855 $ v = {1,2,3} [size = 3] 3856 $ 3857 $ Process1 [P1]: rows_owned=[2] 3858 $ i = {0,3} [size = nrow+1 = 1+1] 3859 $ j = {0,1,2} [size = 3] 3860 $ v = {4,5,6} [size = 3] 3861 3862 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3863 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3864 @*/ 3865 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3866 { 3867 PetscErrorCode ierr; 3868 3869 PetscFunctionBegin; 3870 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3871 PetscFunctionReturn(0); 3872 } 3873 3874 /*@C 3875 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3876 (the default parallel PETSc format). For good matrix assembly performance 3877 the user should preallocate the matrix storage by setting the parameters 3878 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3879 performance can be increased by more than a factor of 50. 3880 3881 Collective 3882 3883 Input Parameters: 3884 + B - the matrix 3885 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3886 (same value is used for all local rows) 3887 . d_nnz - array containing the number of nonzeros in the various rows of the 3888 DIAGONAL portion of the local submatrix (possibly different for each row) 3889 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3890 The size of this array is equal to the number of local rows, i.e 'm'. 3891 For matrices that will be factored, you must leave room for (and set) 3892 the diagonal entry even if it is zero. 3893 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3894 submatrix (same value is used for all local rows). 3895 - o_nnz - array containing the number of nonzeros in the various rows of the 3896 OFF-DIAGONAL portion of the local submatrix (possibly different for 3897 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3898 structure. The size of this array is equal to the number 3899 of local rows, i.e 'm'. 3900 3901 If the *_nnz parameter is given then the *_nz parameter is ignored 3902 3903 The AIJ format (also called the Yale sparse matrix format or 3904 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3905 storage. The stored row and column indices begin with zero. 3906 See Users-Manual: ch_mat for details. 3907 3908 The parallel matrix is partitioned such that the first m0 rows belong to 3909 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3910 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3911 3912 The DIAGONAL portion of the local submatrix of a processor can be defined 3913 as the submatrix which is obtained by extraction the part corresponding to 3914 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3915 first row that belongs to the processor, r2 is the last row belonging to 3916 the this processor, and c1-c2 is range of indices of the local part of a 3917 vector suitable for applying the matrix to. This is an mxn matrix. In the 3918 common case of a square matrix, the row and column ranges are the same and 3919 the DIAGONAL part is also square. The remaining portion of the local 3920 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3921 3922 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3923 3924 You can call MatGetInfo() to get information on how effective the preallocation was; 3925 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3926 You can also run with the option -info and look for messages with the string 3927 malloc in them to see if additional memory allocation was needed. 3928 3929 Example usage: 3930 3931 Consider the following 8x8 matrix with 34 non-zero values, that is 3932 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3933 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3934 as follows: 3935 3936 .vb 3937 1 2 0 | 0 3 0 | 0 4 3938 Proc0 0 5 6 | 7 0 0 | 8 0 3939 9 0 10 | 11 0 0 | 12 0 3940 ------------------------------------- 3941 13 0 14 | 15 16 17 | 0 0 3942 Proc1 0 18 0 | 19 20 21 | 0 0 3943 0 0 0 | 22 23 0 | 24 0 3944 ------------------------------------- 3945 Proc2 25 26 27 | 0 0 28 | 29 0 3946 30 0 0 | 31 32 33 | 0 34 3947 .ve 3948 3949 This can be represented as a collection of submatrices as: 3950 3951 .vb 3952 A B C 3953 D E F 3954 G H I 3955 .ve 3956 3957 Where the submatrices A,B,C are owned by proc0, D,E,F are 3958 owned by proc1, G,H,I are owned by proc2. 3959 3960 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3961 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3962 The 'M','N' parameters are 8,8, and have the same values on all procs. 3963 3964 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3965 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3966 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3967 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3968 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3969 matrix, ans [DF] as another SeqAIJ matrix. 3970 3971 When d_nz, o_nz parameters are specified, d_nz storage elements are 3972 allocated for every row of the local diagonal submatrix, and o_nz 3973 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3974 One way to choose d_nz and o_nz is to use the max nonzerors per local 3975 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3976 In this case, the values of d_nz,o_nz are: 3977 .vb 3978 proc0 : dnz = 2, o_nz = 2 3979 proc1 : dnz = 3, o_nz = 2 3980 proc2 : dnz = 1, o_nz = 4 3981 .ve 3982 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3983 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3984 for proc3. i.e we are using 12+15+10=37 storage locations to store 3985 34 values. 3986 3987 When d_nnz, o_nnz parameters are specified, the storage is specified 3988 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3989 In the above case the values for d_nnz,o_nnz are: 3990 .vb 3991 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3992 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3993 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3994 .ve 3995 Here the space allocated is sum of all the above values i.e 34, and 3996 hence pre-allocation is perfect. 3997 3998 Level: intermediate 3999 4000 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4001 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4002 @*/ 4003 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4004 { 4005 PetscErrorCode ierr; 4006 4007 PetscFunctionBegin; 4008 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4009 PetscValidType(B,1); 4010 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4011 PetscFunctionReturn(0); 4012 } 4013 4014 /*@ 4015 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4016 CSR format for the local rows. 4017 4018 Collective 4019 4020 Input Parameters: 4021 + comm - MPI communicator 4022 . m - number of local rows (Cannot be PETSC_DECIDE) 4023 . n - This value should be the same as the local size used in creating the 4024 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4025 calculated if N is given) For square matrices n is almost always m. 4026 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4027 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4028 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4029 . j - column indices 4030 - a - matrix values 4031 4032 Output Parameter: 4033 . mat - the matrix 4034 4035 Level: intermediate 4036 4037 Notes: 4038 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4039 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4040 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4041 4042 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4043 4044 The format which is used for the sparse matrix input, is equivalent to a 4045 row-major ordering.. i.e for the following matrix, the input data expected is 4046 as shown 4047 4048 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4049 4050 $ 1 0 0 4051 $ 2 0 3 P0 4052 $ ------- 4053 $ 4 5 6 P1 4054 $ 4055 $ Process0 [P0]: rows_owned=[0,1] 4056 $ i = {0,1,3} [size = nrow+1 = 2+1] 4057 $ j = {0,0,2} [size = 3] 4058 $ v = {1,2,3} [size = 3] 4059 $ 4060 $ Process1 [P1]: rows_owned=[2] 4061 $ i = {0,3} [size = nrow+1 = 1+1] 4062 $ j = {0,1,2} [size = 3] 4063 $ v = {4,5,6} [size = 3] 4064 4065 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4066 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4067 @*/ 4068 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4069 { 4070 PetscErrorCode ierr; 4071 4072 PetscFunctionBegin; 4073 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4074 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4075 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4076 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4077 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4078 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4079 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4080 PetscFunctionReturn(0); 4081 } 4082 4083 /*@ 4084 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4085 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4086 4087 Collective 4088 4089 Input Parameters: 4090 + mat - the matrix 4091 . m - number of local rows (Cannot be PETSC_DECIDE) 4092 . n - This value should be the same as the local size used in creating the 4093 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4094 calculated if N is given) For square matrices n is almost always m. 4095 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4096 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4097 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4098 . J - column indices 4099 - v - matrix values 4100 4101 Level: intermediate 4102 4103 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4104 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4105 @*/ 4106 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4107 { 4108 PetscErrorCode ierr; 4109 PetscInt cstart,nnz,i,j; 4110 PetscInt *ld; 4111 PetscBool nooffprocentries; 4112 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4113 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4114 PetscScalar *ad = Ad->a, *ao = Ao->a; 4115 const PetscInt *Adi = Ad->i; 4116 PetscInt ldi,Iii,md; 4117 4118 PetscFunctionBegin; 4119 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4120 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4121 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4122 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4123 4124 cstart = mat->cmap->rstart; 4125 if (!Aij->ld) { 4126 /* count number of entries below block diagonal */ 4127 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4128 Aij->ld = ld; 4129 for (i=0; i<m; i++) { 4130 nnz = Ii[i+1]- Ii[i]; 4131 j = 0; 4132 while (J[j] < cstart && j < nnz) {j++;} 4133 J += nnz; 4134 ld[i] = j; 4135 } 4136 } else { 4137 ld = Aij->ld; 4138 } 4139 4140 for (i=0; i<m; i++) { 4141 nnz = Ii[i+1]- Ii[i]; 4142 Iii = Ii[i]; 4143 ldi = ld[i]; 4144 md = Adi[i+1]-Adi[i]; 4145 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4146 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4147 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4148 ad += md; 4149 ao += nnz - md; 4150 } 4151 nooffprocentries = mat->nooffprocentries; 4152 mat->nooffprocentries = PETSC_TRUE; 4153 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4154 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4155 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4156 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4157 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4158 mat->nooffprocentries = nooffprocentries; 4159 PetscFunctionReturn(0); 4160 } 4161 4162 /*@C 4163 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4164 (the default parallel PETSc format). For good matrix assembly performance 4165 the user should preallocate the matrix storage by setting the parameters 4166 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4167 performance can be increased by more than a factor of 50. 4168 4169 Collective 4170 4171 Input Parameters: 4172 + comm - MPI communicator 4173 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4174 This value should be the same as the local size used in creating the 4175 y vector for the matrix-vector product y = Ax. 4176 . n - This value should be the same as the local size used in creating the 4177 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4178 calculated if N is given) For square matrices n is almost always m. 4179 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4180 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4181 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4182 (same value is used for all local rows) 4183 . d_nnz - array containing the number of nonzeros in the various rows of the 4184 DIAGONAL portion of the local submatrix (possibly different for each row) 4185 or NULL, if d_nz is used to specify the nonzero structure. 4186 The size of this array is equal to the number of local rows, i.e 'm'. 4187 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4188 submatrix (same value is used for all local rows). 4189 - o_nnz - array containing the number of nonzeros in the various rows of the 4190 OFF-DIAGONAL portion of the local submatrix (possibly different for 4191 each row) or NULL, if o_nz is used to specify the nonzero 4192 structure. The size of this array is equal to the number 4193 of local rows, i.e 'm'. 4194 4195 Output Parameter: 4196 . A - the matrix 4197 4198 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4199 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4200 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4201 4202 Notes: 4203 If the *_nnz parameter is given then the *_nz parameter is ignored 4204 4205 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4206 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4207 storage requirements for this matrix. 4208 4209 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4210 processor than it must be used on all processors that share the object for 4211 that argument. 4212 4213 The user MUST specify either the local or global matrix dimensions 4214 (possibly both). 4215 4216 The parallel matrix is partitioned across processors such that the 4217 first m0 rows belong to process 0, the next m1 rows belong to 4218 process 1, the next m2 rows belong to process 2 etc.. where 4219 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4220 values corresponding to [m x N] submatrix. 4221 4222 The columns are logically partitioned with the n0 columns belonging 4223 to 0th partition, the next n1 columns belonging to the next 4224 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4225 4226 The DIAGONAL portion of the local submatrix on any given processor 4227 is the submatrix corresponding to the rows and columns m,n 4228 corresponding to the given processor. i.e diagonal matrix on 4229 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4230 etc. The remaining portion of the local submatrix [m x (N-n)] 4231 constitute the OFF-DIAGONAL portion. The example below better 4232 illustrates this concept. 4233 4234 For a square global matrix we define each processor's diagonal portion 4235 to be its local rows and the corresponding columns (a square submatrix); 4236 each processor's off-diagonal portion encompasses the remainder of the 4237 local matrix (a rectangular submatrix). 4238 4239 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4240 4241 When calling this routine with a single process communicator, a matrix of 4242 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4243 type of communicator, use the construction mechanism 4244 .vb 4245 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4246 .ve 4247 4248 $ MatCreate(...,&A); 4249 $ MatSetType(A,MATMPIAIJ); 4250 $ MatSetSizes(A, m,n,M,N); 4251 $ MatMPIAIJSetPreallocation(A,...); 4252 4253 By default, this format uses inodes (identical nodes) when possible. 4254 We search for consecutive rows with the same nonzero structure, thereby 4255 reusing matrix information to achieve increased efficiency. 4256 4257 Options Database Keys: 4258 + -mat_no_inode - Do not use inodes 4259 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4260 4261 4262 4263 Example usage: 4264 4265 Consider the following 8x8 matrix with 34 non-zero values, that is 4266 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4267 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4268 as follows 4269 4270 .vb 4271 1 2 0 | 0 3 0 | 0 4 4272 Proc0 0 5 6 | 7 0 0 | 8 0 4273 9 0 10 | 11 0 0 | 12 0 4274 ------------------------------------- 4275 13 0 14 | 15 16 17 | 0 0 4276 Proc1 0 18 0 | 19 20 21 | 0 0 4277 0 0 0 | 22 23 0 | 24 0 4278 ------------------------------------- 4279 Proc2 25 26 27 | 0 0 28 | 29 0 4280 30 0 0 | 31 32 33 | 0 34 4281 .ve 4282 4283 This can be represented as a collection of submatrices as 4284 4285 .vb 4286 A B C 4287 D E F 4288 G H I 4289 .ve 4290 4291 Where the submatrices A,B,C are owned by proc0, D,E,F are 4292 owned by proc1, G,H,I are owned by proc2. 4293 4294 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4295 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4296 The 'M','N' parameters are 8,8, and have the same values on all procs. 4297 4298 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4299 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4300 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4301 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4302 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4303 matrix, ans [DF] as another SeqAIJ matrix. 4304 4305 When d_nz, o_nz parameters are specified, d_nz storage elements are 4306 allocated for every row of the local diagonal submatrix, and o_nz 4307 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4308 One way to choose d_nz and o_nz is to use the max nonzerors per local 4309 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4310 In this case, the values of d_nz,o_nz are 4311 .vb 4312 proc0 : dnz = 2, o_nz = 2 4313 proc1 : dnz = 3, o_nz = 2 4314 proc2 : dnz = 1, o_nz = 4 4315 .ve 4316 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4317 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4318 for proc3. i.e we are using 12+15+10=37 storage locations to store 4319 34 values. 4320 4321 When d_nnz, o_nnz parameters are specified, the storage is specified 4322 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4323 In the above case the values for d_nnz,o_nnz are 4324 .vb 4325 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4326 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4327 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4328 .ve 4329 Here the space allocated is sum of all the above values i.e 34, and 4330 hence pre-allocation is perfect. 4331 4332 Level: intermediate 4333 4334 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4335 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4336 @*/ 4337 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4338 { 4339 PetscErrorCode ierr; 4340 PetscMPIInt size; 4341 4342 PetscFunctionBegin; 4343 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4344 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4345 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4346 if (size > 1) { 4347 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4348 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4349 } else { 4350 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4351 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4352 } 4353 PetscFunctionReturn(0); 4354 } 4355 4356 /*@C 4357 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4358 4359 Not collective 4360 4361 Input Parameter: 4362 . A - The MPIAIJ matrix 4363 4364 Output Parameters: 4365 + Ad - The local diagonal block as a SeqAIJ matrix 4366 . Ao - The local off-diagonal block as a SeqAIJ matrix 4367 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4368 4369 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4370 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4371 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4372 local column numbers to global column numbers in the original matrix. 4373 4374 Level: intermediate 4375 4376 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAJ, MATSEQAIJ 4377 @*/ 4378 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4379 { 4380 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4381 PetscBool flg; 4382 PetscErrorCode ierr; 4383 4384 PetscFunctionBegin; 4385 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4386 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4387 if (Ad) *Ad = a->A; 4388 if (Ao) *Ao = a->B; 4389 if (colmap) *colmap = a->garray; 4390 PetscFunctionReturn(0); 4391 } 4392 4393 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4394 { 4395 PetscErrorCode ierr; 4396 PetscInt m,N,i,rstart,nnz,Ii; 4397 PetscInt *indx; 4398 PetscScalar *values; 4399 4400 PetscFunctionBegin; 4401 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4402 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4403 PetscInt *dnz,*onz,sum,bs,cbs; 4404 4405 if (n == PETSC_DECIDE) { 4406 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4407 } 4408 /* Check sum(n) = N */ 4409 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4410 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4411 4412 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4413 rstart -= m; 4414 4415 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4416 for (i=0; i<m; i++) { 4417 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4418 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4419 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4420 } 4421 4422 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4423 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4424 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4425 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4426 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4427 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4428 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4429 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4430 } 4431 4432 /* numeric phase */ 4433 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4434 for (i=0; i<m; i++) { 4435 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4436 Ii = i + rstart; 4437 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4438 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4439 } 4440 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4441 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4442 PetscFunctionReturn(0); 4443 } 4444 4445 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4446 { 4447 PetscErrorCode ierr; 4448 PetscMPIInt rank; 4449 PetscInt m,N,i,rstart,nnz; 4450 size_t len; 4451 const PetscInt *indx; 4452 PetscViewer out; 4453 char *name; 4454 Mat B; 4455 const PetscScalar *values; 4456 4457 PetscFunctionBegin; 4458 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4459 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4460 /* Should this be the type of the diagonal block of A? */ 4461 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4462 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4463 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4464 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4465 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4466 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4467 for (i=0; i<m; i++) { 4468 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4469 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4470 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4471 } 4472 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4473 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4474 4475 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4476 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4477 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4478 sprintf(name,"%s.%d",outfile,rank); 4479 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4480 ierr = PetscFree(name);CHKERRQ(ierr); 4481 ierr = MatView(B,out);CHKERRQ(ierr); 4482 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4483 ierr = MatDestroy(&B);CHKERRQ(ierr); 4484 PetscFunctionReturn(0); 4485 } 4486 4487 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4488 { 4489 PetscErrorCode ierr; 4490 Mat_Merge_SeqsToMPI *merge; 4491 PetscContainer container; 4492 4493 PetscFunctionBegin; 4494 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4495 if (container) { 4496 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4505 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4506 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4507 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4508 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4509 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4510 ierr = PetscFree(merge);CHKERRQ(ierr); 4511 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4512 } 4513 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4514 PetscFunctionReturn(0); 4515 } 4516 4517 #include <../src/mat/utils/freespace.h> 4518 #include <petscbt.h> 4519 4520 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4521 { 4522 PetscErrorCode ierr; 4523 MPI_Comm comm; 4524 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4525 PetscMPIInt size,rank,taga,*len_s; 4526 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4527 PetscInt proc,m; 4528 PetscInt **buf_ri,**buf_rj; 4529 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4530 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4531 MPI_Request *s_waits,*r_waits; 4532 MPI_Status *status; 4533 MatScalar *aa=a->a; 4534 MatScalar **abuf_r,*ba_i; 4535 Mat_Merge_SeqsToMPI *merge; 4536 PetscContainer container; 4537 4538 PetscFunctionBegin; 4539 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4540 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4541 4542 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4543 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4544 4545 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4546 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4547 4548 bi = merge->bi; 4549 bj = merge->bj; 4550 buf_ri = merge->buf_ri; 4551 buf_rj = merge->buf_rj; 4552 4553 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4554 owners = merge->rowmap->range; 4555 len_s = merge->len_s; 4556 4557 /* send and recv matrix values */ 4558 /*-----------------------------*/ 4559 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4560 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4561 4562 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4563 for (proc=0,k=0; proc<size; proc++) { 4564 if (!len_s[proc]) continue; 4565 i = owners[proc]; 4566 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4567 k++; 4568 } 4569 4570 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4571 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4572 ierr = PetscFree(status);CHKERRQ(ierr); 4573 4574 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4575 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4576 4577 /* insert mat values of mpimat */ 4578 /*----------------------------*/ 4579 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4580 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4581 4582 for (k=0; k<merge->nrecv; k++) { 4583 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4584 nrows = *(buf_ri_k[k]); 4585 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4586 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4587 } 4588 4589 /* set values of ba */ 4590 m = merge->rowmap->n; 4591 for (i=0; i<m; i++) { 4592 arow = owners[rank] + i; 4593 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4594 bnzi = bi[i+1] - bi[i]; 4595 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4596 4597 /* add local non-zero vals of this proc's seqmat into ba */ 4598 anzi = ai[arow+1] - ai[arow]; 4599 aj = a->j + ai[arow]; 4600 aa = a->a + ai[arow]; 4601 nextaj = 0; 4602 for (j=0; nextaj<anzi; j++) { 4603 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4604 ba_i[j] += aa[nextaj++]; 4605 } 4606 } 4607 4608 /* add received vals into ba */ 4609 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4610 /* i-th row */ 4611 if (i == *nextrow[k]) { 4612 anzi = *(nextai[k]+1) - *nextai[k]; 4613 aj = buf_rj[k] + *(nextai[k]); 4614 aa = abuf_r[k] + *(nextai[k]); 4615 nextaj = 0; 4616 for (j=0; nextaj<anzi; j++) { 4617 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4618 ba_i[j] += aa[nextaj++]; 4619 } 4620 } 4621 nextrow[k]++; nextai[k]++; 4622 } 4623 } 4624 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4625 } 4626 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4627 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4628 4629 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4630 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4631 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4632 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4633 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4634 PetscFunctionReturn(0); 4635 } 4636 4637 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4638 { 4639 PetscErrorCode ierr; 4640 Mat B_mpi; 4641 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4642 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4643 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4644 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4645 PetscInt len,proc,*dnz,*onz,bs,cbs; 4646 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4647 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4648 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4649 MPI_Status *status; 4650 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4651 PetscBT lnkbt; 4652 Mat_Merge_SeqsToMPI *merge; 4653 PetscContainer container; 4654 4655 PetscFunctionBegin; 4656 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4657 4658 /* make sure it is a PETSc comm */ 4659 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4660 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4661 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4662 4663 ierr = PetscNew(&merge);CHKERRQ(ierr); 4664 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4665 4666 /* determine row ownership */ 4667 /*---------------------------------------------------------*/ 4668 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4669 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4670 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4671 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4672 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4673 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4674 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4675 4676 m = merge->rowmap->n; 4677 owners = merge->rowmap->range; 4678 4679 /* determine the number of messages to send, their lengths */ 4680 /*---------------------------------------------------------*/ 4681 len_s = merge->len_s; 4682 4683 len = 0; /* length of buf_si[] */ 4684 merge->nsend = 0; 4685 for (proc=0; proc<size; proc++) { 4686 len_si[proc] = 0; 4687 if (proc == rank) { 4688 len_s[proc] = 0; 4689 } else { 4690 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4691 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4692 } 4693 if (len_s[proc]) { 4694 merge->nsend++; 4695 nrows = 0; 4696 for (i=owners[proc]; i<owners[proc+1]; i++) { 4697 if (ai[i+1] > ai[i]) nrows++; 4698 } 4699 len_si[proc] = 2*(nrows+1); 4700 len += len_si[proc]; 4701 } 4702 } 4703 4704 /* determine the number and length of messages to receive for ij-structure */ 4705 /*-------------------------------------------------------------------------*/ 4706 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4707 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4708 4709 /* post the Irecv of j-structure */ 4710 /*-------------------------------*/ 4711 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4712 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4713 4714 /* post the Isend of j-structure */ 4715 /*--------------------------------*/ 4716 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4717 4718 for (proc=0, k=0; proc<size; proc++) { 4719 if (!len_s[proc]) continue; 4720 i = owners[proc]; 4721 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4722 k++; 4723 } 4724 4725 /* receives and sends of j-structure are complete */ 4726 /*------------------------------------------------*/ 4727 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4728 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4729 4730 /* send and recv i-structure */ 4731 /*---------------------------*/ 4732 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4733 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4734 4735 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4736 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4737 for (proc=0,k=0; proc<size; proc++) { 4738 if (!len_s[proc]) continue; 4739 /* form outgoing message for i-structure: 4740 buf_si[0]: nrows to be sent 4741 [1:nrows]: row index (global) 4742 [nrows+1:2*nrows+1]: i-structure index 4743 */ 4744 /*-------------------------------------------*/ 4745 nrows = len_si[proc]/2 - 1; 4746 buf_si_i = buf_si + nrows+1; 4747 buf_si[0] = nrows; 4748 buf_si_i[0] = 0; 4749 nrows = 0; 4750 for (i=owners[proc]; i<owners[proc+1]; i++) { 4751 anzi = ai[i+1] - ai[i]; 4752 if (anzi) { 4753 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4754 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4755 nrows++; 4756 } 4757 } 4758 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4759 k++; 4760 buf_si += len_si[proc]; 4761 } 4762 4763 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4764 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4765 4766 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4767 for (i=0; i<merge->nrecv; i++) { 4768 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4769 } 4770 4771 ierr = PetscFree(len_si);CHKERRQ(ierr); 4772 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4773 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4774 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4775 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4776 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4777 ierr = PetscFree(status);CHKERRQ(ierr); 4778 4779 /* compute a local seq matrix in each processor */ 4780 /*----------------------------------------------*/ 4781 /* allocate bi array and free space for accumulating nonzero column info */ 4782 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4783 bi[0] = 0; 4784 4785 /* create and initialize a linked list */ 4786 nlnk = N+1; 4787 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4788 4789 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4790 len = ai[owners[rank+1]] - ai[owners[rank]]; 4791 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4792 4793 current_space = free_space; 4794 4795 /* determine symbolic info for each local row */ 4796 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4797 4798 for (k=0; k<merge->nrecv; k++) { 4799 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4800 nrows = *buf_ri_k[k]; 4801 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4802 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4803 } 4804 4805 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4806 len = 0; 4807 for (i=0; i<m; i++) { 4808 bnzi = 0; 4809 /* add local non-zero cols of this proc's seqmat into lnk */ 4810 arow = owners[rank] + i; 4811 anzi = ai[arow+1] - ai[arow]; 4812 aj = a->j + ai[arow]; 4813 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4814 bnzi += nlnk; 4815 /* add received col data into lnk */ 4816 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4817 if (i == *nextrow[k]) { /* i-th row */ 4818 anzi = *(nextai[k]+1) - *nextai[k]; 4819 aj = buf_rj[k] + *nextai[k]; 4820 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4821 bnzi += nlnk; 4822 nextrow[k]++; nextai[k]++; 4823 } 4824 } 4825 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4826 4827 /* if free space is not available, make more free space */ 4828 if (current_space->local_remaining<bnzi) { 4829 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4830 nspacedouble++; 4831 } 4832 /* copy data into free space, then initialize lnk */ 4833 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4834 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4835 4836 current_space->array += bnzi; 4837 current_space->local_used += bnzi; 4838 current_space->local_remaining -= bnzi; 4839 4840 bi[i+1] = bi[i] + bnzi; 4841 } 4842 4843 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4844 4845 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4846 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4847 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4848 4849 /* create symbolic parallel matrix B_mpi */ 4850 /*---------------------------------------*/ 4851 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4852 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4853 if (n==PETSC_DECIDE) { 4854 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4855 } else { 4856 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4857 } 4858 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4859 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4860 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4861 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4862 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4863 4864 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4865 B_mpi->assembled = PETSC_FALSE; 4866 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4867 merge->bi = bi; 4868 merge->bj = bj; 4869 merge->buf_ri = buf_ri; 4870 merge->buf_rj = buf_rj; 4871 merge->coi = NULL; 4872 merge->coj = NULL; 4873 merge->owners_co = NULL; 4874 4875 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4876 4877 /* attach the supporting struct to B_mpi for reuse */ 4878 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4879 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4880 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4881 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4882 *mpimat = B_mpi; 4883 4884 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4885 PetscFunctionReturn(0); 4886 } 4887 4888 /*@C 4889 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4890 matrices from each processor 4891 4892 Collective 4893 4894 Input Parameters: 4895 + comm - the communicators the parallel matrix will live on 4896 . seqmat - the input sequential matrices 4897 . m - number of local rows (or PETSC_DECIDE) 4898 . n - number of local columns (or PETSC_DECIDE) 4899 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4900 4901 Output Parameter: 4902 . mpimat - the parallel matrix generated 4903 4904 Level: advanced 4905 4906 Notes: 4907 The dimensions of the sequential matrix in each processor MUST be the same. 4908 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4909 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4910 @*/ 4911 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4912 { 4913 PetscErrorCode ierr; 4914 PetscMPIInt size; 4915 4916 PetscFunctionBegin; 4917 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4918 if (size == 1) { 4919 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4920 if (scall == MAT_INITIAL_MATRIX) { 4921 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4922 } else { 4923 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4924 } 4925 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4926 PetscFunctionReturn(0); 4927 } 4928 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4929 if (scall == MAT_INITIAL_MATRIX) { 4930 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4931 } 4932 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4933 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4934 PetscFunctionReturn(0); 4935 } 4936 4937 /*@ 4938 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4939 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4940 with MatGetSize() 4941 4942 Not Collective 4943 4944 Input Parameters: 4945 + A - the matrix 4946 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4947 4948 Output Parameter: 4949 . A_loc - the local sequential matrix generated 4950 4951 Level: developer 4952 4953 Notes: 4954 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4955 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4956 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4957 modify the values of the returned A_loc. 4958 4959 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4960 4961 @*/ 4962 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4963 { 4964 PetscErrorCode ierr; 4965 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4966 Mat_SeqAIJ *mat,*a,*b; 4967 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4968 MatScalar *aa,*ba,*cam; 4969 PetscScalar *ca; 4970 PetscMPIInt size; 4971 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4972 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4973 PetscBool match; 4974 4975 PetscFunctionBegin; 4976 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 4977 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4978 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4979 if (size == 1) { 4980 if (scall == MAT_INITIAL_MATRIX) { 4981 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 4982 *A_loc = mpimat->A; 4983 } else if (scall == MAT_REUSE_MATRIX) { 4984 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4985 } 4986 PetscFunctionReturn(0); 4987 } 4988 4989 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4990 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4991 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4992 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4993 aa = a->a; ba = b->a; 4994 if (scall == MAT_INITIAL_MATRIX) { 4995 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4996 ci[0] = 0; 4997 for (i=0; i<am; i++) { 4998 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4999 } 5000 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5001 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5002 k = 0; 5003 for (i=0; i<am; i++) { 5004 ncols_o = bi[i+1] - bi[i]; 5005 ncols_d = ai[i+1] - ai[i]; 5006 /* off-diagonal portion of A */ 5007 for (jo=0; jo<ncols_o; jo++) { 5008 col = cmap[*bj]; 5009 if (col >= cstart) break; 5010 cj[k] = col; bj++; 5011 ca[k++] = *ba++; 5012 } 5013 /* diagonal portion of A */ 5014 for (j=0; j<ncols_d; j++) { 5015 cj[k] = cstart + *aj++; 5016 ca[k++] = *aa++; 5017 } 5018 /* off-diagonal portion of A */ 5019 for (j=jo; j<ncols_o; j++) { 5020 cj[k] = cmap[*bj++]; 5021 ca[k++] = *ba++; 5022 } 5023 } 5024 /* put together the new matrix */ 5025 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5026 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5027 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5028 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5029 mat->free_a = PETSC_TRUE; 5030 mat->free_ij = PETSC_TRUE; 5031 mat->nonew = 0; 5032 } else if (scall == MAT_REUSE_MATRIX) { 5033 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5034 ci = mat->i; cj = mat->j; cam = mat->a; 5035 for (i=0; i<am; i++) { 5036 /* off-diagonal portion of A */ 5037 ncols_o = bi[i+1] - bi[i]; 5038 for (jo=0; jo<ncols_o; jo++) { 5039 col = cmap[*bj]; 5040 if (col >= cstart) break; 5041 *cam++ = *ba++; bj++; 5042 } 5043 /* diagonal portion of A */ 5044 ncols_d = ai[i+1] - ai[i]; 5045 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5046 /* off-diagonal portion of A */ 5047 for (j=jo; j<ncols_o; j++) { 5048 *cam++ = *ba++; bj++; 5049 } 5050 } 5051 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5052 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5053 PetscFunctionReturn(0); 5054 } 5055 5056 /*@C 5057 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5058 5059 Not Collective 5060 5061 Input Parameters: 5062 + A - the matrix 5063 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5064 - row, col - index sets of rows and columns to extract (or NULL) 5065 5066 Output Parameter: 5067 . A_loc - the local sequential matrix generated 5068 5069 Level: developer 5070 5071 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5072 5073 @*/ 5074 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5075 { 5076 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5077 PetscErrorCode ierr; 5078 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5079 IS isrowa,iscola; 5080 Mat *aloc; 5081 PetscBool match; 5082 5083 PetscFunctionBegin; 5084 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5085 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5086 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5087 if (!row) { 5088 start = A->rmap->rstart; end = A->rmap->rend; 5089 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5090 } else { 5091 isrowa = *row; 5092 } 5093 if (!col) { 5094 start = A->cmap->rstart; 5095 cmap = a->garray; 5096 nzA = a->A->cmap->n; 5097 nzB = a->B->cmap->n; 5098 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5099 ncols = 0; 5100 for (i=0; i<nzB; i++) { 5101 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5102 else break; 5103 } 5104 imark = i; 5105 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5106 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5107 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5108 } else { 5109 iscola = *col; 5110 } 5111 if (scall != MAT_INITIAL_MATRIX) { 5112 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5113 aloc[0] = *A_loc; 5114 } 5115 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5116 if (!col) { /* attach global id of condensed columns */ 5117 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5118 } 5119 *A_loc = aloc[0]; 5120 ierr = PetscFree(aloc);CHKERRQ(ierr); 5121 if (!row) { 5122 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5123 } 5124 if (!col) { 5125 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5126 } 5127 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5128 PetscFunctionReturn(0); 5129 } 5130 5131 /* 5132 * Destroy a mat that may be composed with PetscSF communication objects. 5133 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5134 * */ 5135 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5136 { 5137 PetscSF sf,osf; 5138 IS map; 5139 PetscErrorCode ierr; 5140 5141 PetscFunctionBegin; 5142 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5143 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5144 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5145 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5146 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5147 ierr = ISDestroy(&map);CHKERRQ(ierr); 5148 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5149 PetscFunctionReturn(0); 5150 } 5151 5152 /* 5153 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5154 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5155 * on a global size. 5156 * */ 5157 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5158 { 5159 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5160 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5161 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5162 PetscMPIInt owner; 5163 PetscSFNode *iremote,*oiremote; 5164 const PetscInt *lrowindices; 5165 PetscErrorCode ierr; 5166 PetscSF sf,osf; 5167 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5168 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5169 MPI_Comm comm; 5170 ISLocalToGlobalMapping mapping; 5171 5172 PetscFunctionBegin; 5173 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5174 /* plocalsize is the number of roots 5175 * nrows is the number of leaves 5176 * */ 5177 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5178 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5179 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5180 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5181 for (i=0;i<nrows;i++) { 5182 /* Find a remote index and an owner for a row 5183 * The row could be local or remote 5184 * */ 5185 owner = 0; 5186 lidx = 0; 5187 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5188 iremote[i].index = lidx; 5189 iremote[i].rank = owner; 5190 } 5191 /* Create SF to communicate how many nonzero columns for each row */ 5192 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5193 /* SF will figure out the number of nonzero colunms for each row, and their 5194 * offsets 5195 * */ 5196 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5197 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5198 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5199 5200 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5201 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5202 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5203 roffsets[0] = 0; 5204 roffsets[1] = 0; 5205 for (i=0;i<plocalsize;i++) { 5206 /* diag */ 5207 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5208 /* off diag */ 5209 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5210 /* compute offsets so that we relative location for each row */ 5211 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5212 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5213 } 5214 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5215 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5216 /* 'r' means root, and 'l' means leaf */ 5217 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5218 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5219 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5220 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5221 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5222 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5223 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5224 dntotalcols = 0; 5225 ontotalcols = 0; 5226 ncol = 0; 5227 for (i=0;i<nrows;i++) { 5228 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5229 ncol = PetscMax(pnnz[i],ncol); 5230 /* diag */ 5231 dntotalcols += nlcols[i*2+0]; 5232 /* off diag */ 5233 ontotalcols += nlcols[i*2+1]; 5234 } 5235 /* We do not need to figure the right number of columns 5236 * since all the calculations will be done by going through the raw data 5237 * */ 5238 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5239 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5240 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5241 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5242 /* diag */ 5243 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5244 /* off diag */ 5245 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5246 /* diag */ 5247 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5248 /* off diag */ 5249 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5250 dntotalcols = 0; 5251 ontotalcols = 0; 5252 ntotalcols = 0; 5253 for (i=0;i<nrows;i++) { 5254 owner = 0; 5255 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5256 /* Set iremote for diag matrix */ 5257 for (j=0;j<nlcols[i*2+0];j++) { 5258 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5259 iremote[dntotalcols].rank = owner; 5260 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5261 ilocal[dntotalcols++] = ntotalcols++; 5262 } 5263 /* off diag */ 5264 for (j=0;j<nlcols[i*2+1];j++) { 5265 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5266 oiremote[ontotalcols].rank = owner; 5267 oilocal[ontotalcols++] = ntotalcols++; 5268 } 5269 } 5270 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5271 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5272 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5273 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5274 /* P serves as roots and P_oth is leaves 5275 * Diag matrix 5276 * */ 5277 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5278 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5279 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5280 5281 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5282 /* Off diag */ 5283 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5284 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5285 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5286 /* We operate on the matrix internal data for saving memory */ 5287 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5288 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5289 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5290 /* Convert to global indices for diag matrix */ 5291 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5292 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5293 /* We want P_oth store global indices */ 5294 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5295 /* Use memory scalable approach */ 5296 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5297 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5298 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5299 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5300 /* Convert back to local indices */ 5301 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5302 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5303 nout = 0; 5304 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5305 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5306 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5307 /* Exchange values */ 5308 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5309 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5310 /* Stop PETSc from shrinking memory */ 5311 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5312 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5313 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5314 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5315 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5316 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5317 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5318 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5319 PetscFunctionReturn(0); 5320 } 5321 5322 /* 5323 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5324 * This supports MPIAIJ and MAIJ 5325 * */ 5326 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5327 { 5328 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5329 Mat_SeqAIJ *p_oth; 5330 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5331 IS rows,map; 5332 PetscHMapI hamp; 5333 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5334 MPI_Comm comm; 5335 PetscSF sf,osf; 5336 PetscBool has; 5337 PetscErrorCode ierr; 5338 5339 PetscFunctionBegin; 5340 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5341 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5342 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5343 * and then create a submatrix (that often is an overlapping matrix) 5344 * */ 5345 if (reuse==MAT_INITIAL_MATRIX) { 5346 /* Use a hash table to figure out unique keys */ 5347 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5348 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5349 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5350 count = 0; 5351 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5352 for (i=0;i<a->B->cmap->n;i++) { 5353 key = a->garray[i]/dof; 5354 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5355 if (!has) { 5356 mapping[i] = count; 5357 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5358 } else { 5359 /* Current 'i' has the same value the previous step */ 5360 mapping[i] = count-1; 5361 } 5362 } 5363 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5364 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5365 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5366 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5367 off = 0; 5368 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5369 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5370 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5371 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5372 /* In case, the matrix was already created but users want to recreate the matrix */ 5373 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5374 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5375 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5376 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5377 } else if (reuse==MAT_REUSE_MATRIX) { 5378 /* If matrix was already created, we simply update values using SF objects 5379 * that as attached to the matrix ealier. 5380 * */ 5381 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5382 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5383 if (!sf || !osf) { 5384 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5385 } 5386 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5387 /* Update values in place */ 5388 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5389 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5390 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5391 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5392 } else { 5393 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5394 } 5395 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5396 PetscFunctionReturn(0); 5397 } 5398 5399 /*@C 5400 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5401 5402 Collective on Mat 5403 5404 Input Parameters: 5405 + A,B - the matrices in mpiaij format 5406 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5407 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5408 5409 Output Parameter: 5410 + rowb, colb - index sets of rows and columns of B to extract 5411 - B_seq - the sequential matrix generated 5412 5413 Level: developer 5414 5415 @*/ 5416 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5417 { 5418 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5419 PetscErrorCode ierr; 5420 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5421 IS isrowb,iscolb; 5422 Mat *bseq=NULL; 5423 5424 PetscFunctionBegin; 5425 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5426 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5427 } 5428 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5429 5430 if (scall == MAT_INITIAL_MATRIX) { 5431 start = A->cmap->rstart; 5432 cmap = a->garray; 5433 nzA = a->A->cmap->n; 5434 nzB = a->B->cmap->n; 5435 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5436 ncols = 0; 5437 for (i=0; i<nzB; i++) { /* row < local row index */ 5438 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5439 else break; 5440 } 5441 imark = i; 5442 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5443 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5444 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5445 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5446 } else { 5447 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5448 isrowb = *rowb; iscolb = *colb; 5449 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5450 bseq[0] = *B_seq; 5451 } 5452 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5453 *B_seq = bseq[0]; 5454 ierr = PetscFree(bseq);CHKERRQ(ierr); 5455 if (!rowb) { 5456 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5457 } else { 5458 *rowb = isrowb; 5459 } 5460 if (!colb) { 5461 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5462 } else { 5463 *colb = iscolb; 5464 } 5465 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5466 PetscFunctionReturn(0); 5467 } 5468 5469 /* 5470 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5471 of the OFF-DIAGONAL portion of local A 5472 5473 Collective on Mat 5474 5475 Input Parameters: 5476 + A,B - the matrices in mpiaij format 5477 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5478 5479 Output Parameter: 5480 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5481 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5482 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5483 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5484 5485 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5486 for this matrix. This is not desirable.. 5487 5488 Level: developer 5489 5490 */ 5491 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5492 { 5493 PetscErrorCode ierr; 5494 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5495 Mat_SeqAIJ *b_oth; 5496 VecScatter ctx; 5497 MPI_Comm comm; 5498 const PetscMPIInt *rprocs,*sprocs; 5499 const PetscInt *srow,*rstarts,*sstarts; 5500 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5501 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5502 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5503 MPI_Request *rwaits = NULL,*swaits = NULL; 5504 MPI_Status rstatus; 5505 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5506 5507 PetscFunctionBegin; 5508 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5509 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5510 5511 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5512 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5513 } 5514 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5515 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5516 5517 if (size == 1) { 5518 startsj_s = NULL; 5519 bufa_ptr = NULL; 5520 *B_oth = NULL; 5521 PetscFunctionReturn(0); 5522 } 5523 5524 ctx = a->Mvctx; 5525 tag = ((PetscObject)ctx)->tag; 5526 5527 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5528 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5529 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5530 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5531 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5532 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5533 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5534 5535 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5536 if (scall == MAT_INITIAL_MATRIX) { 5537 /* i-array */ 5538 /*---------*/ 5539 /* post receives */ 5540 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5541 for (i=0; i<nrecvs; i++) { 5542 rowlen = rvalues + rstarts[i]*rbs; 5543 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5544 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5545 } 5546 5547 /* pack the outgoing message */ 5548 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5549 5550 sstartsj[0] = 0; 5551 rstartsj[0] = 0; 5552 len = 0; /* total length of j or a array to be sent */ 5553 if (nsends) { 5554 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5555 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5556 } 5557 for (i=0; i<nsends; i++) { 5558 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5559 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5560 for (j=0; j<nrows; j++) { 5561 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5562 for (l=0; l<sbs; l++) { 5563 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5564 5565 rowlen[j*sbs+l] = ncols; 5566 5567 len += ncols; 5568 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5569 } 5570 k++; 5571 } 5572 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5573 5574 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5575 } 5576 /* recvs and sends of i-array are completed */ 5577 i = nrecvs; 5578 while (i--) { 5579 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5580 } 5581 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5582 ierr = PetscFree(svalues);CHKERRQ(ierr); 5583 5584 /* allocate buffers for sending j and a arrays */ 5585 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5586 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5587 5588 /* create i-array of B_oth */ 5589 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5590 5591 b_othi[0] = 0; 5592 len = 0; /* total length of j or a array to be received */ 5593 k = 0; 5594 for (i=0; i<nrecvs; i++) { 5595 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5596 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5597 for (j=0; j<nrows; j++) { 5598 b_othi[k+1] = b_othi[k] + rowlen[j]; 5599 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5600 k++; 5601 } 5602 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5603 } 5604 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5605 5606 /* allocate space for j and a arrrays of B_oth */ 5607 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5608 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5609 5610 /* j-array */ 5611 /*---------*/ 5612 /* post receives of j-array */ 5613 for (i=0; i<nrecvs; i++) { 5614 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5615 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5616 } 5617 5618 /* pack the outgoing message j-array */ 5619 if (nsends) k = sstarts[0]; 5620 for (i=0; i<nsends; i++) { 5621 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5622 bufJ = bufj+sstartsj[i]; 5623 for (j=0; j<nrows; j++) { 5624 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5625 for (ll=0; ll<sbs; ll++) { 5626 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5627 for (l=0; l<ncols; l++) { 5628 *bufJ++ = cols[l]; 5629 } 5630 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5631 } 5632 } 5633 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5634 } 5635 5636 /* recvs and sends of j-array are completed */ 5637 i = nrecvs; 5638 while (i--) { 5639 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5640 } 5641 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5642 } else if (scall == MAT_REUSE_MATRIX) { 5643 sstartsj = *startsj_s; 5644 rstartsj = *startsj_r; 5645 bufa = *bufa_ptr; 5646 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5647 b_otha = b_oth->a; 5648 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5649 5650 /* a-array */ 5651 /*---------*/ 5652 /* post receives of a-array */ 5653 for (i=0; i<nrecvs; i++) { 5654 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5655 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5656 } 5657 5658 /* pack the outgoing message a-array */ 5659 if (nsends) k = sstarts[0]; 5660 for (i=0; i<nsends; i++) { 5661 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5662 bufA = bufa+sstartsj[i]; 5663 for (j=0; j<nrows; j++) { 5664 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5665 for (ll=0; ll<sbs; ll++) { 5666 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5667 for (l=0; l<ncols; l++) { 5668 *bufA++ = vals[l]; 5669 } 5670 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5671 } 5672 } 5673 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5674 } 5675 /* recvs and sends of a-array are completed */ 5676 i = nrecvs; 5677 while (i--) { 5678 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5679 } 5680 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5681 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5682 5683 if (scall == MAT_INITIAL_MATRIX) { 5684 /* put together the new matrix */ 5685 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5686 5687 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5688 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5689 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5690 b_oth->free_a = PETSC_TRUE; 5691 b_oth->free_ij = PETSC_TRUE; 5692 b_oth->nonew = 0; 5693 5694 ierr = PetscFree(bufj);CHKERRQ(ierr); 5695 if (!startsj_s || !bufa_ptr) { 5696 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5697 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5698 } else { 5699 *startsj_s = sstartsj; 5700 *startsj_r = rstartsj; 5701 *bufa_ptr = bufa; 5702 } 5703 } 5704 5705 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5706 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5707 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5708 PetscFunctionReturn(0); 5709 } 5710 5711 /*@C 5712 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5713 5714 Not Collective 5715 5716 Input Parameters: 5717 . A - The matrix in mpiaij format 5718 5719 Output Parameter: 5720 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5721 . colmap - A map from global column index to local index into lvec 5722 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5723 5724 Level: developer 5725 5726 @*/ 5727 #if defined(PETSC_USE_CTABLE) 5728 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5729 #else 5730 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5731 #endif 5732 { 5733 Mat_MPIAIJ *a; 5734 5735 PetscFunctionBegin; 5736 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5737 PetscValidPointer(lvec, 2); 5738 PetscValidPointer(colmap, 3); 5739 PetscValidPointer(multScatter, 4); 5740 a = (Mat_MPIAIJ*) A->data; 5741 if (lvec) *lvec = a->lvec; 5742 if (colmap) *colmap = a->colmap; 5743 if (multScatter) *multScatter = a->Mvctx; 5744 PetscFunctionReturn(0); 5745 } 5746 5747 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5748 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5749 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5750 #if defined(PETSC_HAVE_MKL_SPARSE) 5751 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5752 #endif 5753 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5754 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5755 #if defined(PETSC_HAVE_ELEMENTAL) 5756 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5757 #endif 5758 #if defined(PETSC_HAVE_HYPRE) 5759 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5760 #endif 5761 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5762 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5763 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5764 5765 /* 5766 Computes (B'*A')' since computing B*A directly is untenable 5767 5768 n p p 5769 ( ) ( ) ( ) 5770 m ( A ) * n ( B ) = m ( C ) 5771 ( ) ( ) ( ) 5772 5773 */ 5774 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5775 { 5776 PetscErrorCode ierr; 5777 Mat At,Bt,Ct; 5778 5779 PetscFunctionBegin; 5780 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5781 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5782 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5783 ierr = MatDestroy(&At);CHKERRQ(ierr); 5784 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5785 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5786 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5787 PetscFunctionReturn(0); 5788 } 5789 5790 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5791 { 5792 PetscErrorCode ierr; 5793 PetscInt m=A->rmap->n,n=B->cmap->n; 5794 5795 PetscFunctionBegin; 5796 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5797 ierr = MatSetSizes(C,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5798 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5799 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 5800 ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr); 5801 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5802 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5803 5804 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5805 PetscFunctionReturn(0); 5806 } 5807 5808 /* ----------------------------------------------------------------*/ 5809 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5810 { 5811 Mat_Product *product = C->product; 5812 Mat A = product->A,B=product->B; 5813 5814 PetscFunctionBegin; 5815 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5816 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5817 5818 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5819 C->ops->productsymbolic = MatProductSymbolic_AB; 5820 PetscFunctionReturn(0); 5821 } 5822 5823 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5824 { 5825 PetscErrorCode ierr; 5826 Mat_Product *product = C->product; 5827 5828 PetscFunctionBegin; 5829 if (product->type == MATPRODUCT_AB) { 5830 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5831 } else SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type is not supported"); 5832 PetscFunctionReturn(0); 5833 } 5834 /* ----------------------------------------------------------------*/ 5835 5836 /*MC 5837 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5838 5839 Options Database Keys: 5840 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5841 5842 Level: beginner 5843 5844 Notes: 5845 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5846 in this case the values associated with the rows and columns one passes in are set to zero 5847 in the matrix 5848 5849 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5850 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5851 5852 .seealso: MatCreateAIJ() 5853 M*/ 5854 5855 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5856 { 5857 Mat_MPIAIJ *b; 5858 PetscErrorCode ierr; 5859 PetscMPIInt size; 5860 5861 PetscFunctionBegin; 5862 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5863 5864 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5865 B->data = (void*)b; 5866 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5867 B->assembled = PETSC_FALSE; 5868 B->insertmode = NOT_SET_VALUES; 5869 b->size = size; 5870 5871 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5872 5873 /* build cache for off array entries formed */ 5874 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5875 5876 b->donotstash = PETSC_FALSE; 5877 b->colmap = 0; 5878 b->garray = 0; 5879 b->roworiented = PETSC_TRUE; 5880 5881 /* stuff used for matrix vector multiply */ 5882 b->lvec = NULL; 5883 b->Mvctx = NULL; 5884 5885 /* stuff for MatGetRow() */ 5886 b->rowindices = 0; 5887 b->rowvalues = 0; 5888 b->getrowactive = PETSC_FALSE; 5889 5890 /* flexible pointer used in CUSP/CUSPARSE classes */ 5891 b->spptr = NULL; 5892 5893 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5894 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5895 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5896 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5897 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5898 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5899 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5900 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5901 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5902 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5903 #if defined(PETSC_HAVE_MKL_SPARSE) 5904 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5905 #endif 5906 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5907 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 5908 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5909 #if defined(PETSC_HAVE_ELEMENTAL) 5910 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5911 #endif 5912 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5913 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5914 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5915 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5916 #if defined(PETSC_HAVE_HYPRE) 5917 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5918 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5919 #endif 5920 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 5921 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 5922 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5923 PetscFunctionReturn(0); 5924 } 5925 5926 /*@C 5927 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5928 and "off-diagonal" part of the matrix in CSR format. 5929 5930 Collective 5931 5932 Input Parameters: 5933 + comm - MPI communicator 5934 . m - number of local rows (Cannot be PETSC_DECIDE) 5935 . n - This value should be the same as the local size used in creating the 5936 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5937 calculated if N is given) For square matrices n is almost always m. 5938 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5939 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5940 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5941 . j - column indices 5942 . a - matrix values 5943 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5944 . oj - column indices 5945 - oa - matrix values 5946 5947 Output Parameter: 5948 . mat - the matrix 5949 5950 Level: advanced 5951 5952 Notes: 5953 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5954 must free the arrays once the matrix has been destroyed and not before. 5955 5956 The i and j indices are 0 based 5957 5958 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5959 5960 This sets local rows and cannot be used to set off-processor values. 5961 5962 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5963 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5964 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5965 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5966 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5967 communication if it is known that only local entries will be set. 5968 5969 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5970 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5971 @*/ 5972 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5973 { 5974 PetscErrorCode ierr; 5975 Mat_MPIAIJ *maij; 5976 5977 PetscFunctionBegin; 5978 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5979 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5980 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5981 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5982 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5983 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5984 maij = (Mat_MPIAIJ*) (*mat)->data; 5985 5986 (*mat)->preallocated = PETSC_TRUE; 5987 5988 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5989 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5990 5991 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5992 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5993 5994 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5995 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5996 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5997 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5998 5999 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6000 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6001 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6002 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6003 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6004 PetscFunctionReturn(0); 6005 } 6006 6007 /* 6008 Special version for direct calls from Fortran 6009 */ 6010 #include <petsc/private/fortranimpl.h> 6011 6012 /* Change these macros so can be used in void function */ 6013 #undef CHKERRQ 6014 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6015 #undef SETERRQ2 6016 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6017 #undef SETERRQ3 6018 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6019 #undef SETERRQ 6020 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6021 6022 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6023 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6024 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6025 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6026 #else 6027 #endif 6028 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6029 { 6030 Mat mat = *mmat; 6031 PetscInt m = *mm, n = *mn; 6032 InsertMode addv = *maddv; 6033 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6034 PetscScalar value; 6035 PetscErrorCode ierr; 6036 6037 MatCheckPreallocated(mat,1); 6038 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6039 6040 #if defined(PETSC_USE_DEBUG) 6041 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6042 #endif 6043 { 6044 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6045 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6046 PetscBool roworiented = aij->roworiented; 6047 6048 /* Some Variables required in the macro */ 6049 Mat A = aij->A; 6050 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6051 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6052 MatScalar *aa = a->a; 6053 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6054 Mat B = aij->B; 6055 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6056 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6057 MatScalar *ba = b->a; 6058 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6059 * cannot use "#if defined" inside a macro. */ 6060 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6061 6062 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6063 PetscInt nonew = a->nonew; 6064 MatScalar *ap1,*ap2; 6065 6066 PetscFunctionBegin; 6067 for (i=0; i<m; i++) { 6068 if (im[i] < 0) continue; 6069 #if defined(PETSC_USE_DEBUG) 6070 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6071 #endif 6072 if (im[i] >= rstart && im[i] < rend) { 6073 row = im[i] - rstart; 6074 lastcol1 = -1; 6075 rp1 = aj + ai[row]; 6076 ap1 = aa + ai[row]; 6077 rmax1 = aimax[row]; 6078 nrow1 = ailen[row]; 6079 low1 = 0; 6080 high1 = nrow1; 6081 lastcol2 = -1; 6082 rp2 = bj + bi[row]; 6083 ap2 = ba + bi[row]; 6084 rmax2 = bimax[row]; 6085 nrow2 = bilen[row]; 6086 low2 = 0; 6087 high2 = nrow2; 6088 6089 for (j=0; j<n; j++) { 6090 if (roworiented) value = v[i*n+j]; 6091 else value = v[i+j*m]; 6092 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6093 if (in[j] >= cstart && in[j] < cend) { 6094 col = in[j] - cstart; 6095 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6096 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6097 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6098 #endif 6099 } else if (in[j] < 0) continue; 6100 #if defined(PETSC_USE_DEBUG) 6101 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6102 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6103 #endif 6104 else { 6105 if (mat->was_assembled) { 6106 if (!aij->colmap) { 6107 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6108 } 6109 #if defined(PETSC_USE_CTABLE) 6110 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6111 col--; 6112 #else 6113 col = aij->colmap[in[j]] - 1; 6114 #endif 6115 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6116 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6117 col = in[j]; 6118 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6119 B = aij->B; 6120 b = (Mat_SeqAIJ*)B->data; 6121 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6122 rp2 = bj + bi[row]; 6123 ap2 = ba + bi[row]; 6124 rmax2 = bimax[row]; 6125 nrow2 = bilen[row]; 6126 low2 = 0; 6127 high2 = nrow2; 6128 bm = aij->B->rmap->n; 6129 ba = b->a; 6130 inserted = PETSC_FALSE; 6131 } 6132 } else col = in[j]; 6133 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6134 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6135 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6136 #endif 6137 } 6138 } 6139 } else if (!aij->donotstash) { 6140 if (roworiented) { 6141 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6142 } else { 6143 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6144 } 6145 } 6146 } 6147 } 6148 PetscFunctionReturnVoid(); 6149 } 6150