1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = NULL; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=NULL; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 583 if (im[i] >= rstart && im[i] < rend) { 584 row = im[i] - rstart; 585 lastcol1 = -1; 586 rp1 = aj + ai[row]; 587 ap1 = aa + ai[row]; 588 rmax1 = aimax[row]; 589 nrow1 = ailen[row]; 590 low1 = 0; 591 high1 = nrow1; 592 lastcol2 = -1; 593 rp2 = bj + bi[row]; 594 ap2 = ba + bi[row]; 595 rmax2 = bimax[row]; 596 nrow2 = bilen[row]; 597 low2 = 0; 598 high2 = nrow2; 599 600 for (j=0; j<n; j++) { 601 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 602 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 603 if (in[j] >= cstart && in[j] < cend) { 604 col = in[j] - cstart; 605 nonew = a->nonew; 606 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 607 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 608 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 609 #endif 610 } else if (in[j] < 0) continue; 611 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 612 else { 613 if (mat->was_assembled) { 614 if (!aij->colmap) { 615 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 616 } 617 #if defined(PETSC_USE_CTABLE) 618 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 619 col--; 620 #else 621 col = aij->colmap[in[j]] - 1; 622 #endif 623 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 624 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 625 col = in[j]; 626 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 627 B = aij->B; 628 b = (Mat_SeqAIJ*)B->data; 629 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 630 rp2 = bj + bi[row]; 631 ap2 = ba + bi[row]; 632 rmax2 = bimax[row]; 633 nrow2 = bilen[row]; 634 low2 = 0; 635 high2 = nrow2; 636 bm = aij->B->rmap->n; 637 ba = b->a; 638 inserted = PETSC_FALSE; 639 } else if (col < 0) { 640 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 641 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 642 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 643 } 644 } else col = in[j]; 645 nonew = b->nonew; 646 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 647 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 648 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 649 #endif 650 } 651 } 652 } else { 653 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 654 if (!aij->donotstash) { 655 mat->assembled = PETSC_FALSE; 656 if (roworiented) { 657 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 658 } else { 659 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 660 } 661 } 662 } 663 } 664 PetscFunctionReturn(0); 665 } 666 667 /* 668 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 669 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 670 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 671 */ 672 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 673 { 674 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 675 Mat A = aij->A; /* diagonal part of the matrix */ 676 Mat B = aij->B; /* offdiagonal part of the matrix */ 677 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 679 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 680 PetscInt *ailen = a->ilen,*aj = a->j; 681 PetscInt *bilen = b->ilen,*bj = b->j; 682 PetscInt am = aij->A->rmap->n,j; 683 PetscInt diag_so_far = 0,dnz; 684 PetscInt offd_so_far = 0,onz; 685 686 PetscFunctionBegin; 687 /* Iterate over all rows of the matrix */ 688 for (j=0; j<am; j++) { 689 dnz = onz = 0; 690 /* Iterate over all non-zero columns of the current row */ 691 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 692 /* If column is in the diagonal */ 693 if (mat_j[col] >= cstart && mat_j[col] < cend) { 694 aj[diag_so_far++] = mat_j[col] - cstart; 695 dnz++; 696 } else { /* off-diagonal entries */ 697 bj[offd_so_far++] = mat_j[col]; 698 onz++; 699 } 700 } 701 ailen[j] = dnz; 702 bilen[j] = onz; 703 } 704 PetscFunctionReturn(0); 705 } 706 707 /* 708 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 709 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 710 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 711 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 713 */ 714 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 715 { 716 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 717 Mat A = aij->A; /* diagonal part of the matrix */ 718 Mat B = aij->B; /* offdiagonal part of the matrix */ 719 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 720 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 721 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 722 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 723 PetscInt *ailen = a->ilen,*aj = a->j; 724 PetscInt *bilen = b->ilen,*bj = b->j; 725 PetscInt am = aij->A->rmap->n,j; 726 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 727 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 728 PetscScalar *aa = a->a,*ba = b->a; 729 730 PetscFunctionBegin; 731 /* Iterate over all rows of the matrix */ 732 for (j=0; j<am; j++) { 733 dnz_row = onz_row = 0; 734 rowstart_offd = full_offd_i[j]; 735 rowstart_diag = full_diag_i[j]; 736 /* Iterate over all non-zero columns of the current row */ 737 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 738 /* If column is in the diagonal */ 739 if (mat_j[col] >= cstart && mat_j[col] < cend) { 740 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 741 aa[rowstart_diag+dnz_row] = mat_a[col]; 742 dnz_row++; 743 } else { /* off-diagonal entries */ 744 bj[rowstart_offd+onz_row] = mat_j[col]; 745 ba[rowstart_offd+onz_row] = mat_a[col]; 746 onz_row++; 747 } 748 } 749 ailen[j] = dnz_row; 750 bilen[j] = onz_row; 751 } 752 PetscFunctionReturn(0); 753 } 754 755 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 756 { 757 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 758 PetscErrorCode ierr; 759 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 760 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 761 762 PetscFunctionBegin; 763 for (i=0; i<m; i++) { 764 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 765 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 766 if (idxm[i] >= rstart && idxm[i] < rend) { 767 row = idxm[i] - rstart; 768 for (j=0; j<n; j++) { 769 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 770 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 771 if (idxn[j] >= cstart && idxn[j] < cend) { 772 col = idxn[j] - cstart; 773 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 774 } else { 775 if (!aij->colmap) { 776 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 777 } 778 #if defined(PETSC_USE_CTABLE) 779 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 780 col--; 781 #else 782 col = aij->colmap[idxn[j]] - 1; 783 #endif 784 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 785 else { 786 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 787 } 788 } 789 } 790 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 791 } 792 PetscFunctionReturn(0); 793 } 794 795 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 796 797 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 798 { 799 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 800 PetscErrorCode ierr; 801 PetscInt nstash,reallocs; 802 803 PetscFunctionBegin; 804 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 805 806 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 807 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 808 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 809 PetscFunctionReturn(0); 810 } 811 812 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 813 { 814 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 815 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 816 PetscErrorCode ierr; 817 PetscMPIInt n; 818 PetscInt i,j,rstart,ncols,flg; 819 PetscInt *row,*col; 820 PetscBool other_disassembled; 821 PetscScalar *val; 822 823 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 824 825 PetscFunctionBegin; 826 if (!aij->donotstash && !mat->nooffprocentries) { 827 while (1) { 828 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 829 if (!flg) break; 830 831 for (i=0; i<n;) { 832 /* Now identify the consecutive vals belonging to the same row */ 833 for (j=i,rstart=row[j]; j<n; j++) { 834 if (row[j] != rstart) break; 835 } 836 if (j < n) ncols = j-i; 837 else ncols = n-i; 838 /* Now assemble all these values with a single function call */ 839 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 840 i = j; 841 } 842 } 843 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 844 } 845 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 846 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 847 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 848 if (mat->boundtocpu) { 849 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 850 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 851 } 852 #endif 853 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 854 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 855 856 /* determine if any processor has disassembled, if so we must 857 also disassemble ourself, in order that we may reassemble. */ 858 /* 859 if nonzero structure of submatrix B cannot change then we know that 860 no processor disassembled thus we can skip this stuff 861 */ 862 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 863 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 864 if (mat->was_assembled && !other_disassembled) { 865 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 866 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 867 #endif 868 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 869 } 870 } 871 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 872 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 875 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 876 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 877 #endif 878 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 879 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 880 881 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 882 883 aij->rowvalues = NULL; 884 885 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 886 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 887 888 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 889 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 890 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 891 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 892 } 893 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 894 mat->offloadmask = PETSC_OFFLOAD_BOTH; 895 #endif 896 PetscFunctionReturn(0); 897 } 898 899 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 900 { 901 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 906 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 907 PetscFunctionReturn(0); 908 } 909 910 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 911 { 912 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 913 PetscObjectState sA, sB; 914 PetscInt *lrows; 915 PetscInt r, len; 916 PetscBool cong, lch, gch; 917 PetscErrorCode ierr; 918 919 PetscFunctionBegin; 920 /* get locally owned rows */ 921 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 922 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 923 /* fix right hand side if needed */ 924 if (x && b) { 925 const PetscScalar *xx; 926 PetscScalar *bb; 927 928 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 929 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 930 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 931 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 932 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 933 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 934 } 935 936 sA = mat->A->nonzerostate; 937 sB = mat->B->nonzerostate; 938 939 if (diag != 0.0 && cong) { 940 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 941 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 942 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 943 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 944 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 945 PetscInt nnwA, nnwB; 946 PetscBool nnzA, nnzB; 947 948 nnwA = aijA->nonew; 949 nnwB = aijB->nonew; 950 nnzA = aijA->keepnonzeropattern; 951 nnzB = aijB->keepnonzeropattern; 952 if (!nnzA) { 953 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 954 aijA->nonew = 0; 955 } 956 if (!nnzB) { 957 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 958 aijB->nonew = 0; 959 } 960 /* Must zero here before the next loop */ 961 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 962 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 963 for (r = 0; r < len; ++r) { 964 const PetscInt row = lrows[r] + A->rmap->rstart; 965 if (row >= A->cmap->N) continue; 966 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 967 } 968 aijA->nonew = nnwA; 969 aijB->nonew = nnwB; 970 } else { 971 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 972 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 973 } 974 ierr = PetscFree(lrows);CHKERRQ(ierr); 975 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 976 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 977 978 /* reduce nonzerostate */ 979 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 980 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 981 if (gch) A->nonzerostate++; 982 PetscFunctionReturn(0); 983 } 984 985 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 986 { 987 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 988 PetscErrorCode ierr; 989 PetscMPIInt n = A->rmap->n; 990 PetscInt i,j,r,m,len = 0; 991 PetscInt *lrows,*owners = A->rmap->range; 992 PetscMPIInt p = 0; 993 PetscSFNode *rrows; 994 PetscSF sf; 995 const PetscScalar *xx; 996 PetscScalar *bb,*mask; 997 Vec xmask,lmask; 998 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 999 const PetscInt *aj, *ii,*ridx; 1000 PetscScalar *aa; 1001 1002 PetscFunctionBegin; 1003 /* Create SF where leaves are input rows and roots are owned rows */ 1004 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1005 for (r = 0; r < n; ++r) lrows[r] = -1; 1006 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1007 for (r = 0; r < N; ++r) { 1008 const PetscInt idx = rows[r]; 1009 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1010 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1011 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1012 } 1013 rrows[r].rank = p; 1014 rrows[r].index = rows[r] - owners[p]; 1015 } 1016 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1017 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1018 /* Collect flags for rows to be zeroed */ 1019 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1020 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1021 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1022 /* Compress and put in row numbers */ 1023 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1024 /* zero diagonal part of matrix */ 1025 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1026 /* handle off diagonal part of matrix */ 1027 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1028 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1029 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1030 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1031 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1032 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1033 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1034 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1035 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1036 PetscBool cong; 1037 1038 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1039 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1040 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1043 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1044 } 1045 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1046 /* remove zeroed rows of off diagonal matrix */ 1047 ii = aij->i; 1048 for (i=0; i<len; i++) { 1049 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1050 } 1051 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1052 if (aij->compressedrow.use) { 1053 m = aij->compressedrow.nrows; 1054 ii = aij->compressedrow.i; 1055 ridx = aij->compressedrow.rindex; 1056 for (i=0; i<m; i++) { 1057 n = ii[i+1] - ii[i]; 1058 aj = aij->j + ii[i]; 1059 aa = aij->a + ii[i]; 1060 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[*ridx] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 ridx++; 1070 } 1071 } else { /* do not use compressed row format */ 1072 m = l->B->rmap->n; 1073 for (i=0; i<m; i++) { 1074 n = ii[i+1] - ii[i]; 1075 aj = aij->j + ii[i]; 1076 aa = aij->a + ii[i]; 1077 for (j=0; j<n; j++) { 1078 if (PetscAbsScalar(mask[*aj])) { 1079 if (b) bb[i] -= *aa*xx[*aj]; 1080 *aa = 0.0; 1081 } 1082 aa++; 1083 aj++; 1084 } 1085 } 1086 } 1087 if (x && b) { 1088 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1089 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1090 } 1091 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1092 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1093 ierr = PetscFree(lrows);CHKERRQ(ierr); 1094 1095 /* only change matrix nonzero state if pattern was allowed to be changed */ 1096 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1097 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1098 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1099 } 1100 PetscFunctionReturn(0); 1101 } 1102 1103 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1104 { 1105 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1106 PetscErrorCode ierr; 1107 PetscInt nt; 1108 VecScatter Mvctx = a->Mvctx; 1109 1110 PetscFunctionBegin; 1111 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1112 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1113 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1114 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1115 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1116 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1117 PetscFunctionReturn(0); 1118 } 1119 1120 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1121 { 1122 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1123 PetscErrorCode ierr; 1124 1125 PetscFunctionBegin; 1126 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 VecScatter Mvctx = a->Mvctx; 1135 1136 PetscFunctionBegin; 1137 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1138 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1139 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1140 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1141 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 /* do nondiagonal part */ 1152 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1153 /* do local part */ 1154 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1155 /* add partial results together */ 1156 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1157 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1162 { 1163 MPI_Comm comm; 1164 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1165 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1166 IS Me,Notme; 1167 PetscErrorCode ierr; 1168 PetscInt M,N,first,last,*notme,i; 1169 PetscBool lf; 1170 PetscMPIInt size; 1171 1172 PetscFunctionBegin; 1173 /* Easy test: symmetric diagonal block */ 1174 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1175 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1176 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1177 if (!*f) PetscFunctionReturn(0); 1178 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1179 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1180 if (size == 1) PetscFunctionReturn(0); 1181 1182 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1183 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1184 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1185 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1186 for (i=0; i<first; i++) notme[i] = i; 1187 for (i=last; i<M; i++) notme[i-last+first] = i; 1188 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1189 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1190 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1191 Aoff = Aoffs[0]; 1192 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1193 Boff = Boffs[0]; 1194 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1195 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1196 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1197 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1198 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1199 ierr = PetscFree(notme);CHKERRQ(ierr); 1200 PetscFunctionReturn(0); 1201 } 1202 1203 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1209 PetscFunctionReturn(0); 1210 } 1211 1212 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1213 { 1214 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 /* do nondiagonal part */ 1219 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1220 /* do local part */ 1221 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1222 /* add partial results together */ 1223 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1224 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1225 PetscFunctionReturn(0); 1226 } 1227 1228 /* 1229 This only works correctly for square matrices where the subblock A->A is the 1230 diagonal block 1231 */ 1232 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1233 { 1234 PetscErrorCode ierr; 1235 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1236 1237 PetscFunctionBegin; 1238 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1239 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1240 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1241 PetscFunctionReturn(0); 1242 } 1243 1244 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1245 { 1246 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1247 PetscErrorCode ierr; 1248 1249 PetscFunctionBegin; 1250 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1251 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1252 PetscFunctionReturn(0); 1253 } 1254 1255 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1256 { 1257 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 #if defined(PETSC_USE_LOG) 1262 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1263 #endif 1264 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1265 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1266 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1267 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1268 #if defined(PETSC_USE_CTABLE) 1269 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1270 #else 1271 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1272 #endif 1273 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1274 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1275 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1276 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1277 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1278 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1279 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1280 1281 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1282 ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr); 1283 1284 ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1294 #if defined(PETSC_HAVE_ELEMENTAL) 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1296 #endif 1297 #if defined(PETSC_HAVE_SCALAPACK) 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr); 1299 #endif 1300 #if defined(PETSC_HAVE_HYPRE) 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1302 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1303 #endif 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1306 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1311 { 1312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1313 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1314 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1315 const PetscInt *garray = aij->garray; 1316 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1317 PetscInt *rowlens; 1318 PetscInt *colidxs; 1319 PetscScalar *matvals; 1320 PetscErrorCode ierr; 1321 1322 PetscFunctionBegin; 1323 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1324 1325 M = mat->rmap->N; 1326 N = mat->cmap->N; 1327 m = mat->rmap->n; 1328 rs = mat->rmap->rstart; 1329 cs = mat->cmap->rstart; 1330 nz = A->nz + B->nz; 1331 1332 /* write matrix header */ 1333 header[0] = MAT_FILE_CLASSID; 1334 header[1] = M; header[2] = N; header[3] = nz; 1335 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1336 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1337 1338 /* fill in and store row lengths */ 1339 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1340 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1341 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1342 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1343 1344 /* fill in and store column indices */ 1345 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1346 for (cnt=0, i=0; i<m; i++) { 1347 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1348 if (garray[B->j[jb]] > cs) break; 1349 colidxs[cnt++] = garray[B->j[jb]]; 1350 } 1351 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1352 colidxs[cnt++] = A->j[ja] + cs; 1353 for (; jb<B->i[i+1]; jb++) 1354 colidxs[cnt++] = garray[B->j[jb]]; 1355 } 1356 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1357 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1358 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1359 1360 /* fill in and store nonzero values */ 1361 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1362 for (cnt=0, i=0; i<m; i++) { 1363 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1364 if (garray[B->j[jb]] > cs) break; 1365 matvals[cnt++] = B->a[jb]; 1366 } 1367 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1368 matvals[cnt++] = A->a[ja]; 1369 for (; jb<B->i[i+1]; jb++) 1370 matvals[cnt++] = B->a[jb]; 1371 } 1372 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1373 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1374 ierr = PetscFree(matvals);CHKERRQ(ierr); 1375 1376 /* write block size option to the viewer's .info file */ 1377 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 #include <petscdraw.h> 1382 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1383 { 1384 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1385 PetscErrorCode ierr; 1386 PetscMPIInt rank = aij->rank,size = aij->size; 1387 PetscBool isdraw,iascii,isbinary; 1388 PetscViewer sviewer; 1389 PetscViewerFormat format; 1390 1391 PetscFunctionBegin; 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1393 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1394 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1395 if (iascii) { 1396 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1397 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1398 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1399 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1400 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1401 for (i=0; i<(PetscInt)size; i++) { 1402 nmax = PetscMax(nmax,nz[i]); 1403 nmin = PetscMin(nmin,nz[i]); 1404 navg += nz[i]; 1405 } 1406 ierr = PetscFree(nz);CHKERRQ(ierr); 1407 navg = navg/size; 1408 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1409 PetscFunctionReturn(0); 1410 } 1411 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1412 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1413 MatInfo info; 1414 PetscBool inodes; 1415 1416 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1417 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1418 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1419 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1420 if (!inodes) { 1421 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1422 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1423 } else { 1424 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1425 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1426 } 1427 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1428 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1429 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1430 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1431 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1432 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1433 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1434 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1435 PetscFunctionReturn(0); 1436 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1437 PetscInt inodecount,inodelimit,*inodes; 1438 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1439 if (inodes) { 1440 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1441 } else { 1442 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1443 } 1444 PetscFunctionReturn(0); 1445 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1446 PetscFunctionReturn(0); 1447 } 1448 } else if (isbinary) { 1449 if (size == 1) { 1450 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1451 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1452 } else { 1453 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1454 } 1455 PetscFunctionReturn(0); 1456 } else if (iascii && size == 1) { 1457 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1458 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1459 PetscFunctionReturn(0); 1460 } else if (isdraw) { 1461 PetscDraw draw; 1462 PetscBool isnull; 1463 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1464 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1465 if (isnull) PetscFunctionReturn(0); 1466 } 1467 1468 { /* assemble the entire matrix onto first processor */ 1469 Mat A = NULL, Av; 1470 IS isrow,iscol; 1471 1472 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1473 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1474 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1475 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1476 /* The commented code uses MatCreateSubMatrices instead */ 1477 /* 1478 Mat *AA, A = NULL, Av; 1479 IS isrow,iscol; 1480 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1482 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1483 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1484 if (!rank) { 1485 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1486 A = AA[0]; 1487 Av = AA[0]; 1488 } 1489 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1490 */ 1491 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1492 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1493 /* 1494 Everyone has to call to draw the matrix since the graphics waits are 1495 synchronized across all processors that share the PetscDraw object 1496 */ 1497 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1498 if (!rank) { 1499 if (((PetscObject)mat)->name) { 1500 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1501 } 1502 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1503 } 1504 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1505 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1506 ierr = MatDestroy(&A);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1512 { 1513 PetscErrorCode ierr; 1514 PetscBool iascii,isdraw,issocket,isbinary; 1515 1516 PetscFunctionBegin; 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1519 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1520 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1521 if (iascii || isdraw || isbinary || issocket) { 1522 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1523 } 1524 PetscFunctionReturn(0); 1525 } 1526 1527 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1528 { 1529 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1530 PetscErrorCode ierr; 1531 Vec bb1 = NULL; 1532 PetscBool hasop; 1533 1534 PetscFunctionBegin; 1535 if (flag == SOR_APPLY_UPPER) { 1536 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1537 PetscFunctionReturn(0); 1538 } 1539 1540 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1541 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1542 } 1543 1544 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1545 if (flag & SOR_ZERO_INITIAL_GUESS) { 1546 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1547 its--; 1548 } 1549 1550 while (its--) { 1551 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1553 1554 /* update rhs: bb1 = bb - B*x */ 1555 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1556 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1557 1558 /* local sweep */ 1559 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1560 } 1561 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1562 if (flag & SOR_ZERO_INITIAL_GUESS) { 1563 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1564 its--; 1565 } 1566 while (its--) { 1567 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1569 1570 /* update rhs: bb1 = bb - B*x */ 1571 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1572 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1573 1574 /* local sweep */ 1575 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1576 } 1577 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1578 if (flag & SOR_ZERO_INITIAL_GUESS) { 1579 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1580 its--; 1581 } 1582 while (its--) { 1583 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 1586 /* update rhs: bb1 = bb - B*x */ 1587 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1588 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1589 1590 /* local sweep */ 1591 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1592 } 1593 } else if (flag & SOR_EISENSTAT) { 1594 Vec xx1; 1595 1596 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1597 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1598 1599 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1600 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1601 if (!mat->diag) { 1602 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1603 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1604 } 1605 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1606 if (hasop) { 1607 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1608 } else { 1609 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1610 } 1611 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1612 1613 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1614 1615 /* local sweep */ 1616 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1617 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1618 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1619 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1620 1621 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1622 1623 matin->factorerrortype = mat->A->factorerrortype; 1624 PetscFunctionReturn(0); 1625 } 1626 1627 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1628 { 1629 Mat aA,aB,Aperm; 1630 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1631 PetscScalar *aa,*ba; 1632 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1633 PetscSF rowsf,sf; 1634 IS parcolp = NULL; 1635 PetscBool done; 1636 PetscErrorCode ierr; 1637 1638 PetscFunctionBegin; 1639 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1640 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1641 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1642 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1643 1644 /* Invert row permutation to find out where my rows should go */ 1645 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1646 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1647 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1648 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1649 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1650 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1651 1652 /* Invert column permutation to find out where my columns should go */ 1653 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1654 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1655 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1656 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1657 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1658 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1659 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1660 1661 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1662 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1663 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1664 1665 /* Find out where my gcols should go */ 1666 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1667 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1668 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1669 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1670 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1671 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1672 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1673 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1674 1675 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1676 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1677 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1678 for (i=0; i<m; i++) { 1679 PetscInt row = rdest[i]; 1680 PetscMPIInt rowner; 1681 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1682 for (j=ai[i]; j<ai[i+1]; j++) { 1683 PetscInt col = cdest[aj[j]]; 1684 PetscMPIInt cowner; 1685 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1686 if (rowner == cowner) dnnz[i]++; 1687 else onnz[i]++; 1688 } 1689 for (j=bi[i]; j<bi[i+1]; j++) { 1690 PetscInt col = gcdest[bj[j]]; 1691 PetscMPIInt cowner; 1692 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1693 if (rowner == cowner) dnnz[i]++; 1694 else onnz[i]++; 1695 } 1696 } 1697 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1701 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1702 1703 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1705 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1706 for (i=0; i<m; i++) { 1707 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1708 PetscInt j0,rowlen; 1709 rowlen = ai[i+1] - ai[i]; 1710 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1711 for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1712 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1713 } 1714 rowlen = bi[i+1] - bi[i]; 1715 for (j0=j=0; j<rowlen; j0=j) { 1716 for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1717 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1718 } 1719 } 1720 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1723 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1725 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1726 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1727 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1728 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1729 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1730 *B = Aperm; 1731 PetscFunctionReturn(0); 1732 } 1733 1734 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1735 { 1736 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1737 PetscErrorCode ierr; 1738 1739 PetscFunctionBegin; 1740 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1741 if (ghosts) *ghosts = aij->garray; 1742 PetscFunctionReturn(0); 1743 } 1744 1745 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1746 { 1747 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1748 Mat A = mat->A,B = mat->B; 1749 PetscErrorCode ierr; 1750 PetscLogDouble isend[5],irecv[5]; 1751 1752 PetscFunctionBegin; 1753 info->block_size = 1.0; 1754 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1755 1756 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1757 isend[3] = info->memory; isend[4] = info->mallocs; 1758 1759 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1760 1761 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1762 isend[3] += info->memory; isend[4] += info->mallocs; 1763 if (flag == MAT_LOCAL) { 1764 info->nz_used = isend[0]; 1765 info->nz_allocated = isend[1]; 1766 info->nz_unneeded = isend[2]; 1767 info->memory = isend[3]; 1768 info->mallocs = isend[4]; 1769 } else if (flag == MAT_GLOBAL_MAX) { 1770 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1771 1772 info->nz_used = irecv[0]; 1773 info->nz_allocated = irecv[1]; 1774 info->nz_unneeded = irecv[2]; 1775 info->memory = irecv[3]; 1776 info->mallocs = irecv[4]; 1777 } else if (flag == MAT_GLOBAL_SUM) { 1778 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1779 1780 info->nz_used = irecv[0]; 1781 info->nz_allocated = irecv[1]; 1782 info->nz_unneeded = irecv[2]; 1783 info->memory = irecv[3]; 1784 info->mallocs = irecv[4]; 1785 } 1786 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1787 info->fill_ratio_needed = 0; 1788 info->factor_mallocs = 0; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1793 { 1794 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1795 PetscErrorCode ierr; 1796 1797 PetscFunctionBegin; 1798 switch (op) { 1799 case MAT_NEW_NONZERO_LOCATIONS: 1800 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1801 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1802 case MAT_KEEP_NONZERO_PATTERN: 1803 case MAT_NEW_NONZERO_LOCATION_ERR: 1804 case MAT_USE_INODES: 1805 case MAT_IGNORE_ZERO_ENTRIES: 1806 MatCheckPreallocated(A,1); 1807 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1808 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1809 break; 1810 case MAT_ROW_ORIENTED: 1811 MatCheckPreallocated(A,1); 1812 a->roworiented = flg; 1813 1814 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1815 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1816 break; 1817 case MAT_NEW_DIAGONALS: 1818 case MAT_SORTED_FULL: 1819 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1820 break; 1821 case MAT_IGNORE_OFF_PROC_ENTRIES: 1822 a->donotstash = flg; 1823 break; 1824 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1825 case MAT_SPD: 1826 case MAT_SYMMETRIC: 1827 case MAT_STRUCTURALLY_SYMMETRIC: 1828 case MAT_HERMITIAN: 1829 case MAT_SYMMETRY_ETERNAL: 1830 break; 1831 case MAT_SUBMAT_SINGLEIS: 1832 A->submat_singleis = flg; 1833 break; 1834 case MAT_STRUCTURE_ONLY: 1835 /* The option is handled directly by MatSetOption() */ 1836 break; 1837 default: 1838 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1839 } 1840 PetscFunctionReturn(0); 1841 } 1842 1843 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1844 { 1845 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1846 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1847 PetscErrorCode ierr; 1848 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1849 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1850 PetscInt *cmap,*idx_p; 1851 1852 PetscFunctionBegin; 1853 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1854 mat->getrowactive = PETSC_TRUE; 1855 1856 if (!mat->rowvalues && (idx || v)) { 1857 /* 1858 allocate enough space to hold information from the longest row. 1859 */ 1860 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1861 PetscInt max = 1,tmp; 1862 for (i=0; i<matin->rmap->n; i++) { 1863 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1864 if (max < tmp) max = tmp; 1865 } 1866 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1867 } 1868 1869 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1870 lrow = row - rstart; 1871 1872 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1873 if (!v) {pvA = NULL; pvB = NULL;} 1874 if (!idx) {pcA = NULL; if (!v) pcB = NULL;} 1875 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1876 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1877 nztot = nzA + nzB; 1878 1879 cmap = mat->garray; 1880 if (v || idx) { 1881 if (nztot) { 1882 /* Sort by increasing column numbers, assuming A and B already sorted */ 1883 PetscInt imark = -1; 1884 if (v) { 1885 *v = v_p = mat->rowvalues; 1886 for (i=0; i<nzB; i++) { 1887 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1888 else break; 1889 } 1890 imark = i; 1891 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1892 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1893 } 1894 if (idx) { 1895 *idx = idx_p = mat->rowindices; 1896 if (imark > -1) { 1897 for (i=0; i<imark; i++) { 1898 idx_p[i] = cmap[cworkB[i]]; 1899 } 1900 } else { 1901 for (i=0; i<nzB; i++) { 1902 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1903 else break; 1904 } 1905 imark = i; 1906 } 1907 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1908 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1909 } 1910 } else { 1911 if (idx) *idx = NULL; 1912 if (v) *v = NULL; 1913 } 1914 } 1915 *nz = nztot; 1916 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1917 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1922 { 1923 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1924 1925 PetscFunctionBegin; 1926 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1927 aij->getrowactive = PETSC_FALSE; 1928 PetscFunctionReturn(0); 1929 } 1930 1931 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1932 { 1933 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1934 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1935 PetscErrorCode ierr; 1936 PetscInt i,j,cstart = mat->cmap->rstart; 1937 PetscReal sum = 0.0; 1938 MatScalar *v; 1939 1940 PetscFunctionBegin; 1941 if (aij->size == 1) { 1942 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1943 } else { 1944 if (type == NORM_FROBENIUS) { 1945 v = amat->a; 1946 for (i=0; i<amat->nz; i++) { 1947 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1948 } 1949 v = bmat->a; 1950 for (i=0; i<bmat->nz; i++) { 1951 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1952 } 1953 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1954 *norm = PetscSqrtReal(*norm); 1955 ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr); 1956 } else if (type == NORM_1) { /* max column norm */ 1957 PetscReal *tmp,*tmp2; 1958 PetscInt *jj,*garray = aij->garray; 1959 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1960 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1961 *norm = 0.0; 1962 v = amat->a; jj = amat->j; 1963 for (j=0; j<amat->nz; j++) { 1964 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1965 } 1966 v = bmat->a; jj = bmat->j; 1967 for (j=0; j<bmat->nz; j++) { 1968 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1969 } 1970 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1971 for (j=0; j<mat->cmap->N; j++) { 1972 if (tmp2[j] > *norm) *norm = tmp2[j]; 1973 } 1974 ierr = PetscFree(tmp);CHKERRQ(ierr); 1975 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1976 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1977 } else if (type == NORM_INFINITY) { /* max row norm */ 1978 PetscReal ntemp = 0.0; 1979 for (j=0; j<aij->A->rmap->n; j++) { 1980 v = amat->a + amat->i[j]; 1981 sum = 0.0; 1982 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1983 sum += PetscAbsScalar(*v); v++; 1984 } 1985 v = bmat->a + bmat->i[j]; 1986 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1987 sum += PetscAbsScalar(*v); v++; 1988 } 1989 if (sum > ntemp) ntemp = sum; 1990 } 1991 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1992 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1993 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1999 { 2000 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2001 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2002 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2003 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2004 PetscErrorCode ierr; 2005 Mat B,A_diag,*B_diag; 2006 const MatScalar *array; 2007 2008 PetscFunctionBegin; 2009 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2010 ai = Aloc->i; aj = Aloc->j; 2011 bi = Bloc->i; bj = Bloc->j; 2012 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2013 PetscInt *d_nnz,*g_nnz,*o_nnz; 2014 PetscSFNode *oloc; 2015 PETSC_UNUSED PetscSF sf; 2016 2017 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2018 /* compute d_nnz for preallocation */ 2019 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2020 for (i=0; i<ai[ma]; i++) { 2021 d_nnz[aj[i]]++; 2022 } 2023 /* compute local off-diagonal contributions */ 2024 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2025 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2026 /* map those to global */ 2027 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2028 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2029 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2030 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2031 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2032 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2033 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2034 2035 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2036 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2037 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2038 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2039 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2040 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2041 } else { 2042 B = *matout; 2043 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2044 } 2045 2046 b = (Mat_MPIAIJ*)B->data; 2047 A_diag = a->A; 2048 B_diag = &b->A; 2049 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2050 A_diag_ncol = A_diag->cmap->N; 2051 B_diag_ilen = sub_B_diag->ilen; 2052 B_diag_i = sub_B_diag->i; 2053 2054 /* Set ilen for diagonal of B */ 2055 for (i=0; i<A_diag_ncol; i++) { 2056 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2057 } 2058 2059 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2060 very quickly (=without using MatSetValues), because all writes are local. */ 2061 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2062 2063 /* copy over the B part */ 2064 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2065 array = Bloc->a; 2066 row = A->rmap->rstart; 2067 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2068 cols_tmp = cols; 2069 for (i=0; i<mb; i++) { 2070 ncol = bi[i+1]-bi[i]; 2071 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2072 row++; 2073 array += ncol; cols_tmp += ncol; 2074 } 2075 ierr = PetscFree(cols);CHKERRQ(ierr); 2076 2077 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2079 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2080 *matout = B; 2081 } else { 2082 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2083 } 2084 PetscFunctionReturn(0); 2085 } 2086 2087 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2088 { 2089 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2090 Mat a = aij->A,b = aij->B; 2091 PetscErrorCode ierr; 2092 PetscInt s1,s2,s3; 2093 2094 PetscFunctionBegin; 2095 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2096 if (rr) { 2097 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2098 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2099 /* Overlap communication with computation. */ 2100 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2101 } 2102 if (ll) { 2103 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2104 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2105 ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr); 2106 } 2107 /* scale the diagonal block */ 2108 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2109 2110 if (rr) { 2111 /* Do a scatter end and then right scale the off-diagonal block */ 2112 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2113 ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr); 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2119 { 2120 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2125 PetscFunctionReturn(0); 2126 } 2127 2128 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2129 { 2130 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2131 Mat a,b,c,d; 2132 PetscBool flg; 2133 PetscErrorCode ierr; 2134 2135 PetscFunctionBegin; 2136 a = matA->A; b = matA->B; 2137 c = matB->A; d = matB->B; 2138 2139 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2140 if (flg) { 2141 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2142 } 2143 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2144 PetscFunctionReturn(0); 2145 } 2146 2147 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2148 { 2149 PetscErrorCode ierr; 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2151 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2152 2153 PetscFunctionBegin; 2154 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2155 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2156 /* because of the column compression in the off-processor part of the matrix a->B, 2157 the number of columns in a->B and b->B may be different, hence we cannot call 2158 the MatCopy() directly on the two parts. If need be, we can provide a more 2159 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2160 then copying the submatrices */ 2161 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2162 } else { 2163 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2164 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2165 } 2166 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2171 { 2172 PetscErrorCode ierr; 2173 2174 PetscFunctionBegin; 2175 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr); 2176 PetscFunctionReturn(0); 2177 } 2178 2179 /* 2180 Computes the number of nonzeros per row needed for preallocation when X and Y 2181 have different nonzero structure. 2182 */ 2183 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2184 { 2185 PetscInt i,j,k,nzx,nzy; 2186 2187 PetscFunctionBegin; 2188 /* Set the number of nonzeros in the new matrix */ 2189 for (i=0; i<m; i++) { 2190 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2191 nzx = xi[i+1] - xi[i]; 2192 nzy = yi[i+1] - yi[i]; 2193 nnz[i] = 0; 2194 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2195 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2196 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2197 nnz[i]++; 2198 } 2199 for (; k<nzy; k++) nnz[i]++; 2200 } 2201 PetscFunctionReturn(0); 2202 } 2203 2204 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2205 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2206 { 2207 PetscErrorCode ierr; 2208 PetscInt m = Y->rmap->N; 2209 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2210 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2211 2212 PetscFunctionBegin; 2213 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2214 PetscFunctionReturn(0); 2215 } 2216 2217 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2218 { 2219 PetscErrorCode ierr; 2220 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2221 PetscBLASInt bnz,one=1; 2222 Mat_SeqAIJ *x,*y; 2223 2224 PetscFunctionBegin; 2225 if (str == SAME_NONZERO_PATTERN) { 2226 PetscScalar alpha = a; 2227 x = (Mat_SeqAIJ*)xx->A->data; 2228 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2229 y = (Mat_SeqAIJ*)yy->A->data; 2230 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2231 x = (Mat_SeqAIJ*)xx->B->data; 2232 y = (Mat_SeqAIJ*)yy->B->data; 2233 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2234 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2235 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2236 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2237 will be updated */ 2238 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2239 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2240 Y->offloadmask = PETSC_OFFLOAD_CPU; 2241 } 2242 #endif 2243 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2244 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2245 } else { 2246 Mat B; 2247 PetscInt *nnz_d,*nnz_o; 2248 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2249 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2250 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2251 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2252 ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr); 2253 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2254 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2255 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2256 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2257 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2258 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2259 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2260 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2261 } 2262 PetscFunctionReturn(0); 2263 } 2264 2265 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2266 2267 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2268 { 2269 #if defined(PETSC_USE_COMPLEX) 2270 PetscErrorCode ierr; 2271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2272 2273 PetscFunctionBegin; 2274 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2275 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2276 #else 2277 PetscFunctionBegin; 2278 #endif 2279 PetscFunctionReturn(0); 2280 } 2281 2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2283 { 2284 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2285 PetscErrorCode ierr; 2286 2287 PetscFunctionBegin; 2288 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2289 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2300 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2305 { 2306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2307 PetscErrorCode ierr; 2308 PetscInt i,*idxb = NULL; 2309 PetscScalar *va,*vb; 2310 Vec vtmp; 2311 2312 PetscFunctionBegin; 2313 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2314 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2315 if (idx) { 2316 for (i=0; i<A->rmap->n; i++) { 2317 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2318 } 2319 } 2320 2321 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2322 if (idx) { 2323 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2324 } 2325 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2326 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2327 2328 for (i=0; i<A->rmap->n; i++) { 2329 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2330 va[i] = vb[i]; 2331 if (idx) idx[i] = a->garray[idxb[i]]; 2332 } 2333 } 2334 2335 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2336 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2337 ierr = PetscFree(idxb);CHKERRQ(ierr); 2338 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } 2341 2342 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2343 { 2344 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2345 PetscErrorCode ierr; 2346 PetscInt i,*idxb = NULL; 2347 PetscScalar *va,*vb; 2348 Vec vtmp; 2349 2350 PetscFunctionBegin; 2351 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2352 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2353 if (idx) { 2354 for (i=0; i<A->cmap->n; i++) { 2355 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2356 } 2357 } 2358 2359 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2360 if (idx) { 2361 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2362 } 2363 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2364 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2365 2366 for (i=0; i<A->rmap->n; i++) { 2367 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2368 va[i] = vb[i]; 2369 if (idx) idx[i] = a->garray[idxb[i]]; 2370 } 2371 } 2372 2373 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2374 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2375 ierr = PetscFree(idxb);CHKERRQ(ierr); 2376 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2381 { 2382 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2383 PetscInt n = A->rmap->n; 2384 PetscInt cstart = A->cmap->rstart; 2385 PetscInt *cmap = mat->garray; 2386 PetscInt *diagIdx, *offdiagIdx; 2387 Vec diagV, offdiagV; 2388 PetscScalar *a, *diagA, *offdiagA; 2389 PetscInt r; 2390 PetscErrorCode ierr; 2391 2392 PetscFunctionBegin; 2393 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2394 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2395 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2396 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2397 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2398 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2399 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2400 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2401 for (r = 0; r < n; ++r) { 2402 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2403 a[r] = diagA[r]; 2404 idx[r] = cstart + diagIdx[r]; 2405 } else { 2406 a[r] = offdiagA[r]; 2407 idx[r] = cmap[offdiagIdx[r]]; 2408 } 2409 } 2410 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2411 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2412 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2413 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2414 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2415 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2420 { 2421 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2422 PetscInt n = A->rmap->n; 2423 PetscInt cstart = A->cmap->rstart; 2424 PetscInt *cmap = mat->garray; 2425 PetscInt *diagIdx, *offdiagIdx; 2426 Vec diagV, offdiagV; 2427 PetscScalar *a, *diagA, *offdiagA; 2428 PetscInt r; 2429 PetscErrorCode ierr; 2430 2431 PetscFunctionBegin; 2432 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2433 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2434 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2435 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2436 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2437 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2438 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2439 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2440 for (r = 0; r < n; ++r) { 2441 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2442 a[r] = diagA[r]; 2443 idx[r] = cstart + diagIdx[r]; 2444 } else { 2445 a[r] = offdiagA[r]; 2446 idx[r] = cmap[offdiagIdx[r]]; 2447 } 2448 } 2449 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2450 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2451 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2452 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2453 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2454 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2455 PetscFunctionReturn(0); 2456 } 2457 2458 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2459 { 2460 PetscErrorCode ierr; 2461 Mat *dummy; 2462 2463 PetscFunctionBegin; 2464 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2465 *newmat = *dummy; 2466 ierr = PetscFree(dummy);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2471 { 2472 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2473 PetscErrorCode ierr; 2474 2475 PetscFunctionBegin; 2476 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2477 A->factorerrortype = a->A->factorerrortype; 2478 PetscFunctionReturn(0); 2479 } 2480 2481 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2482 { 2483 PetscErrorCode ierr; 2484 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2485 2486 PetscFunctionBegin; 2487 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2488 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2489 if (x->assembled) { 2490 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2491 } else { 2492 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2493 } 2494 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2495 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2496 PetscFunctionReturn(0); 2497 } 2498 2499 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2500 { 2501 PetscFunctionBegin; 2502 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2503 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2504 PetscFunctionReturn(0); 2505 } 2506 2507 /*@ 2508 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2509 2510 Collective on Mat 2511 2512 Input Parameters: 2513 + A - the matrix 2514 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2515 2516 Level: advanced 2517 2518 @*/ 2519 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2520 { 2521 PetscErrorCode ierr; 2522 2523 PetscFunctionBegin; 2524 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2525 PetscFunctionReturn(0); 2526 } 2527 2528 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2529 { 2530 PetscErrorCode ierr; 2531 PetscBool sc = PETSC_FALSE,flg; 2532 2533 PetscFunctionBegin; 2534 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2535 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2536 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2537 if (flg) { 2538 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2539 } 2540 ierr = PetscOptionsTail();CHKERRQ(ierr); 2541 PetscFunctionReturn(0); 2542 } 2543 2544 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2545 { 2546 PetscErrorCode ierr; 2547 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2548 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2549 2550 PetscFunctionBegin; 2551 if (!Y->preallocated) { 2552 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2553 } else if (!aij->nz) { 2554 PetscInt nonew = aij->nonew; 2555 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2556 aij->nonew = nonew; 2557 } 2558 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2559 PetscFunctionReturn(0); 2560 } 2561 2562 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2563 { 2564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2565 PetscErrorCode ierr; 2566 2567 PetscFunctionBegin; 2568 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2569 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2570 if (d) { 2571 PetscInt rstart; 2572 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2573 *d += rstart; 2574 2575 } 2576 PetscFunctionReturn(0); 2577 } 2578 2579 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2580 { 2581 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2582 PetscErrorCode ierr; 2583 2584 PetscFunctionBegin; 2585 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2586 PetscFunctionReturn(0); 2587 } 2588 2589 /* -------------------------------------------------------------------*/ 2590 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2591 MatGetRow_MPIAIJ, 2592 MatRestoreRow_MPIAIJ, 2593 MatMult_MPIAIJ, 2594 /* 4*/ MatMultAdd_MPIAIJ, 2595 MatMultTranspose_MPIAIJ, 2596 MatMultTransposeAdd_MPIAIJ, 2597 NULL, 2598 NULL, 2599 NULL, 2600 /*10*/ NULL, 2601 NULL, 2602 NULL, 2603 MatSOR_MPIAIJ, 2604 MatTranspose_MPIAIJ, 2605 /*15*/ MatGetInfo_MPIAIJ, 2606 MatEqual_MPIAIJ, 2607 MatGetDiagonal_MPIAIJ, 2608 MatDiagonalScale_MPIAIJ, 2609 MatNorm_MPIAIJ, 2610 /*20*/ MatAssemblyBegin_MPIAIJ, 2611 MatAssemblyEnd_MPIAIJ, 2612 MatSetOption_MPIAIJ, 2613 MatZeroEntries_MPIAIJ, 2614 /*24*/ MatZeroRows_MPIAIJ, 2615 NULL, 2616 NULL, 2617 NULL, 2618 NULL, 2619 /*29*/ MatSetUp_MPIAIJ, 2620 NULL, 2621 NULL, 2622 MatGetDiagonalBlock_MPIAIJ, 2623 NULL, 2624 /*34*/ MatDuplicate_MPIAIJ, 2625 NULL, 2626 NULL, 2627 NULL, 2628 NULL, 2629 /*39*/ MatAXPY_MPIAIJ, 2630 MatCreateSubMatrices_MPIAIJ, 2631 MatIncreaseOverlap_MPIAIJ, 2632 MatGetValues_MPIAIJ, 2633 MatCopy_MPIAIJ, 2634 /*44*/ MatGetRowMax_MPIAIJ, 2635 MatScale_MPIAIJ, 2636 MatShift_MPIAIJ, 2637 MatDiagonalSet_MPIAIJ, 2638 MatZeroRowsColumns_MPIAIJ, 2639 /*49*/ MatSetRandom_MPIAIJ, 2640 NULL, 2641 NULL, 2642 NULL, 2643 NULL, 2644 /*54*/ MatFDColoringCreate_MPIXAIJ, 2645 NULL, 2646 MatSetUnfactored_MPIAIJ, 2647 MatPermute_MPIAIJ, 2648 NULL, 2649 /*59*/ MatCreateSubMatrix_MPIAIJ, 2650 MatDestroy_MPIAIJ, 2651 MatView_MPIAIJ, 2652 NULL, 2653 NULL, 2654 /*64*/ NULL, 2655 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2656 NULL, 2657 NULL, 2658 NULL, 2659 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2660 MatGetRowMinAbs_MPIAIJ, 2661 NULL, 2662 NULL, 2663 NULL, 2664 NULL, 2665 /*75*/ MatFDColoringApply_AIJ, 2666 MatSetFromOptions_MPIAIJ, 2667 NULL, 2668 NULL, 2669 MatFindZeroDiagonals_MPIAIJ, 2670 /*80*/ NULL, 2671 NULL, 2672 NULL, 2673 /*83*/ MatLoad_MPIAIJ, 2674 MatIsSymmetric_MPIAIJ, 2675 NULL, 2676 NULL, 2677 NULL, 2678 NULL, 2679 /*89*/ NULL, 2680 NULL, 2681 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2682 NULL, 2683 NULL, 2684 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2685 NULL, 2686 NULL, 2687 NULL, 2688 MatBindToCPU_MPIAIJ, 2689 /*99*/ MatProductSetFromOptions_MPIAIJ, 2690 NULL, 2691 NULL, 2692 MatConjugate_MPIAIJ, 2693 NULL, 2694 /*104*/MatSetValuesRow_MPIAIJ, 2695 MatRealPart_MPIAIJ, 2696 MatImaginaryPart_MPIAIJ, 2697 NULL, 2698 NULL, 2699 /*109*/NULL, 2700 NULL, 2701 MatGetRowMin_MPIAIJ, 2702 NULL, 2703 MatMissingDiagonal_MPIAIJ, 2704 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2705 NULL, 2706 MatGetGhosts_MPIAIJ, 2707 NULL, 2708 NULL, 2709 /*119*/NULL, 2710 NULL, 2711 NULL, 2712 NULL, 2713 MatGetMultiProcBlock_MPIAIJ, 2714 /*124*/MatFindNonzeroRows_MPIAIJ, 2715 MatGetColumnNorms_MPIAIJ, 2716 MatInvertBlockDiagonal_MPIAIJ, 2717 MatInvertVariableBlockDiagonal_MPIAIJ, 2718 MatCreateSubMatricesMPI_MPIAIJ, 2719 /*129*/NULL, 2720 NULL, 2721 NULL, 2722 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2723 NULL, 2724 /*134*/NULL, 2725 NULL, 2726 NULL, 2727 NULL, 2728 NULL, 2729 /*139*/MatSetBlockSizes_MPIAIJ, 2730 NULL, 2731 NULL, 2732 MatFDColoringSetUp_MPIXAIJ, 2733 MatFindOffBlockDiagonalEntries_MPIAIJ, 2734 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2735 /*145*/NULL, 2736 NULL, 2737 NULL 2738 }; 2739 2740 /* ----------------------------------------------------------------------------------------*/ 2741 2742 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2743 { 2744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2745 PetscErrorCode ierr; 2746 2747 PetscFunctionBegin; 2748 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2749 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2750 PetscFunctionReturn(0); 2751 } 2752 2753 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2754 { 2755 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2756 PetscErrorCode ierr; 2757 2758 PetscFunctionBegin; 2759 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2760 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2761 PetscFunctionReturn(0); 2762 } 2763 2764 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2765 { 2766 Mat_MPIAIJ *b; 2767 PetscErrorCode ierr; 2768 PetscMPIInt size; 2769 2770 PetscFunctionBegin; 2771 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2772 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2773 b = (Mat_MPIAIJ*)B->data; 2774 2775 #if defined(PETSC_USE_CTABLE) 2776 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2777 #else 2778 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2779 #endif 2780 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2781 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2782 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2783 2784 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2785 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2786 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2787 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2788 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2789 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2790 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2792 2793 if (!B->preallocated) { 2794 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2795 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2796 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2797 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2798 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2799 } 2800 2801 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2802 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2803 B->preallocated = PETSC_TRUE; 2804 B->was_assembled = PETSC_FALSE; 2805 B->assembled = PETSC_FALSE; 2806 PetscFunctionReturn(0); 2807 } 2808 2809 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2810 { 2811 Mat_MPIAIJ *b; 2812 PetscErrorCode ierr; 2813 2814 PetscFunctionBegin; 2815 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2816 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2817 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2818 b = (Mat_MPIAIJ*)B->data; 2819 2820 #if defined(PETSC_USE_CTABLE) 2821 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2822 #else 2823 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2824 #endif 2825 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2826 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2827 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2828 2829 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2830 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2831 B->preallocated = PETSC_TRUE; 2832 B->was_assembled = PETSC_FALSE; 2833 B->assembled = PETSC_FALSE; 2834 PetscFunctionReturn(0); 2835 } 2836 2837 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2838 { 2839 Mat mat; 2840 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2841 PetscErrorCode ierr; 2842 2843 PetscFunctionBegin; 2844 *newmat = NULL; 2845 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2846 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2847 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2848 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2849 a = (Mat_MPIAIJ*)mat->data; 2850 2851 mat->factortype = matin->factortype; 2852 mat->assembled = matin->assembled; 2853 mat->insertmode = NOT_SET_VALUES; 2854 mat->preallocated = matin->preallocated; 2855 2856 a->size = oldmat->size; 2857 a->rank = oldmat->rank; 2858 a->donotstash = oldmat->donotstash; 2859 a->roworiented = oldmat->roworiented; 2860 a->rowindices = NULL; 2861 a->rowvalues = NULL; 2862 a->getrowactive = PETSC_FALSE; 2863 2864 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2865 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2866 2867 if (oldmat->colmap) { 2868 #if defined(PETSC_USE_CTABLE) 2869 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2870 #else 2871 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2872 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2873 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2874 #endif 2875 } else a->colmap = NULL; 2876 if (oldmat->garray) { 2877 PetscInt len; 2878 len = oldmat->B->cmap->n; 2879 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2880 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2881 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2882 } else a->garray = NULL; 2883 2884 /* It may happen MatDuplicate is called with a non-assembled matrix 2885 In fact, MatDuplicate only requires the matrix to be preallocated 2886 This may happen inside a DMCreateMatrix_Shell */ 2887 if (oldmat->lvec) { 2888 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2890 } 2891 if (oldmat->Mvctx) { 2892 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2893 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2894 } 2895 if (oldmat->Mvctx_mpi1) { 2896 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2897 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2898 } 2899 2900 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2901 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2902 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2903 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2904 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2905 *newmat = mat; 2906 PetscFunctionReturn(0); 2907 } 2908 2909 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2910 { 2911 PetscBool isbinary, ishdf5; 2912 PetscErrorCode ierr; 2913 2914 PetscFunctionBegin; 2915 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2916 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2917 /* force binary viewer to load .info file if it has not yet done so */ 2918 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2919 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2920 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2921 if (isbinary) { 2922 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2923 } else if (ishdf5) { 2924 #if defined(PETSC_HAVE_HDF5) 2925 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2926 #else 2927 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2928 #endif 2929 } else { 2930 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2931 } 2932 PetscFunctionReturn(0); 2933 } 2934 2935 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2936 { 2937 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2938 PetscInt *rowidxs,*colidxs; 2939 PetscScalar *matvals; 2940 PetscErrorCode ierr; 2941 2942 PetscFunctionBegin; 2943 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2944 2945 /* read in matrix header */ 2946 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2947 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2948 M = header[1]; N = header[2]; nz = header[3]; 2949 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 2950 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 2951 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2952 2953 /* set block sizes from the viewer's .info file */ 2954 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 2955 /* set global sizes if not set already */ 2956 if (mat->rmap->N < 0) mat->rmap->N = M; 2957 if (mat->cmap->N < 0) mat->cmap->N = N; 2958 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 2959 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 2960 2961 /* check if the matrix sizes are correct */ 2962 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2963 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 2964 2965 /* read in row lengths and build row indices */ 2966 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 2967 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 2968 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 2969 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 2970 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 2971 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 2972 /* read in column indices and matrix values */ 2973 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 2974 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 2975 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 2976 /* store matrix indices and values */ 2977 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 2978 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 2979 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 2980 PetscFunctionReturn(0); 2981 } 2982 2983 /* Not scalable because of ISAllGather() unless getting all columns. */ 2984 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2985 { 2986 PetscErrorCode ierr; 2987 IS iscol_local; 2988 PetscBool isstride; 2989 PetscMPIInt lisstride=0,gisstride; 2990 2991 PetscFunctionBegin; 2992 /* check if we are grabbing all columns*/ 2993 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2994 2995 if (isstride) { 2996 PetscInt start,len,mstart,mlen; 2997 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2998 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2999 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3000 if (mstart == start && mlen-mstart == len) lisstride = 1; 3001 } 3002 3003 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3004 if (gisstride) { 3005 PetscInt N; 3006 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3007 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3008 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3009 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3010 } else { 3011 PetscInt cbs; 3012 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3013 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3014 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3015 } 3016 3017 *isseq = iscol_local; 3018 PetscFunctionReturn(0); 3019 } 3020 3021 /* 3022 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3023 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3024 3025 Input Parameters: 3026 mat - matrix 3027 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3028 i.e., mat->rstart <= isrow[i] < mat->rend 3029 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3030 i.e., mat->cstart <= iscol[i] < mat->cend 3031 Output Parameter: 3032 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3033 iscol_o - sequential column index set for retrieving mat->B 3034 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3035 */ 3036 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3037 { 3038 PetscErrorCode ierr; 3039 Vec x,cmap; 3040 const PetscInt *is_idx; 3041 PetscScalar *xarray,*cmaparray; 3042 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3043 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3044 Mat B=a->B; 3045 Vec lvec=a->lvec,lcmap; 3046 PetscInt i,cstart,cend,Bn=B->cmap->N; 3047 MPI_Comm comm; 3048 VecScatter Mvctx=a->Mvctx; 3049 3050 PetscFunctionBegin; 3051 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3052 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3053 3054 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3055 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3056 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3057 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3058 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3059 3060 /* Get start indices */ 3061 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3062 isstart -= ncols; 3063 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3064 3065 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3066 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3067 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3068 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3069 for (i=0; i<ncols; i++) { 3070 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3071 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3072 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3073 } 3074 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3075 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3076 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3077 3078 /* Get iscol_d */ 3079 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3080 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3081 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3082 3083 /* Get isrow_d */ 3084 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3085 rstart = mat->rmap->rstart; 3086 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3087 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3088 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3089 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3090 3091 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3092 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3093 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3094 3095 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3096 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3097 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3098 3099 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3100 3101 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3102 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3103 3104 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3105 /* off-process column indices */ 3106 count = 0; 3107 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3108 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3109 3110 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3111 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3112 for (i=0; i<Bn; i++) { 3113 if (PetscRealPart(xarray[i]) > -1.0) { 3114 idx[count] = i; /* local column index in off-diagonal part B */ 3115 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3116 count++; 3117 } 3118 } 3119 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3120 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3121 3122 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3123 /* cannot ensure iscol_o has same blocksize as iscol! */ 3124 3125 ierr = PetscFree(idx);CHKERRQ(ierr); 3126 *garray = cmap1; 3127 3128 ierr = VecDestroy(&x);CHKERRQ(ierr); 3129 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3130 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3131 PetscFunctionReturn(0); 3132 } 3133 3134 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3135 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3136 { 3137 PetscErrorCode ierr; 3138 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3139 Mat M = NULL; 3140 MPI_Comm comm; 3141 IS iscol_d,isrow_d,iscol_o; 3142 Mat Asub = NULL,Bsub = NULL; 3143 PetscInt n; 3144 3145 PetscFunctionBegin; 3146 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3147 3148 if (call == MAT_REUSE_MATRIX) { 3149 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3150 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3151 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3152 3153 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3154 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3155 3156 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3157 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3158 3159 /* Update diagonal and off-diagonal portions of submat */ 3160 asub = (Mat_MPIAIJ*)(*submat)->data; 3161 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3162 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3163 if (n) { 3164 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3165 } 3166 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3167 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3168 3169 } else { /* call == MAT_INITIAL_MATRIX) */ 3170 const PetscInt *garray; 3171 PetscInt BsubN; 3172 3173 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3174 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3175 3176 /* Create local submatrices Asub and Bsub */ 3177 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3178 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3179 3180 /* Create submatrix M */ 3181 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3182 3183 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3184 asub = (Mat_MPIAIJ*)M->data; 3185 3186 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3187 n = asub->B->cmap->N; 3188 if (BsubN > n) { 3189 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3190 const PetscInt *idx; 3191 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3192 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3193 3194 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3195 j = 0; 3196 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3197 for (i=0; i<n; i++) { 3198 if (j >= BsubN) break; 3199 while (subgarray[i] > garray[j]) j++; 3200 3201 if (subgarray[i] == garray[j]) { 3202 idx_new[i] = idx[j++]; 3203 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3204 } 3205 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3206 3207 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3208 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3209 3210 } else if (BsubN < n) { 3211 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3212 } 3213 3214 ierr = PetscFree(garray);CHKERRQ(ierr); 3215 *submat = M; 3216 3217 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3218 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3219 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3220 3221 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3222 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3223 3224 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3225 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3226 } 3227 PetscFunctionReturn(0); 3228 } 3229 3230 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3231 { 3232 PetscErrorCode ierr; 3233 IS iscol_local=NULL,isrow_d; 3234 PetscInt csize; 3235 PetscInt n,i,j,start,end; 3236 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3237 MPI_Comm comm; 3238 3239 PetscFunctionBegin; 3240 /* If isrow has same processor distribution as mat, 3241 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3242 if (call == MAT_REUSE_MATRIX) { 3243 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3244 if (isrow_d) { 3245 sameRowDist = PETSC_TRUE; 3246 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3247 } else { 3248 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3249 if (iscol_local) { 3250 sameRowDist = PETSC_TRUE; 3251 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3252 } 3253 } 3254 } else { 3255 /* Check if isrow has same processor distribution as mat */ 3256 sameDist[0] = PETSC_FALSE; 3257 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3258 if (!n) { 3259 sameDist[0] = PETSC_TRUE; 3260 } else { 3261 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3262 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3263 if (i >= start && j < end) { 3264 sameDist[0] = PETSC_TRUE; 3265 } 3266 } 3267 3268 /* Check if iscol has same processor distribution as mat */ 3269 sameDist[1] = PETSC_FALSE; 3270 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3271 if (!n) { 3272 sameDist[1] = PETSC_TRUE; 3273 } else { 3274 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3275 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3276 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3277 } 3278 3279 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3280 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3281 sameRowDist = tsameDist[0]; 3282 } 3283 3284 if (sameRowDist) { 3285 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3286 /* isrow and iscol have same processor distribution as mat */ 3287 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3288 PetscFunctionReturn(0); 3289 } else { /* sameRowDist */ 3290 /* isrow has same processor distribution as mat */ 3291 if (call == MAT_INITIAL_MATRIX) { 3292 PetscBool sorted; 3293 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3294 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3295 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3296 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3297 3298 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3299 if (sorted) { 3300 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3301 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3302 PetscFunctionReturn(0); 3303 } 3304 } else { /* call == MAT_REUSE_MATRIX */ 3305 IS iscol_sub; 3306 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3307 if (iscol_sub) { 3308 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3309 PetscFunctionReturn(0); 3310 } 3311 } 3312 } 3313 } 3314 3315 /* General case: iscol -> iscol_local which has global size of iscol */ 3316 if (call == MAT_REUSE_MATRIX) { 3317 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3318 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3319 } else { 3320 if (!iscol_local) { 3321 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3322 } 3323 } 3324 3325 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3326 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3327 3328 if (call == MAT_INITIAL_MATRIX) { 3329 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3330 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3331 } 3332 PetscFunctionReturn(0); 3333 } 3334 3335 /*@C 3336 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3337 and "off-diagonal" part of the matrix in CSR format. 3338 3339 Collective 3340 3341 Input Parameters: 3342 + comm - MPI communicator 3343 . A - "diagonal" portion of matrix 3344 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3345 - garray - global index of B columns 3346 3347 Output Parameter: 3348 . mat - the matrix, with input A as its local diagonal matrix 3349 Level: advanced 3350 3351 Notes: 3352 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3353 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3354 3355 .seealso: MatCreateMPIAIJWithSplitArrays() 3356 @*/ 3357 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3358 { 3359 PetscErrorCode ierr; 3360 Mat_MPIAIJ *maij; 3361 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3362 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3363 PetscScalar *oa=b->a; 3364 Mat Bnew; 3365 PetscInt m,n,N; 3366 3367 PetscFunctionBegin; 3368 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3369 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3370 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3371 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3372 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3373 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3374 3375 /* Get global columns of mat */ 3376 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3377 3378 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3379 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3380 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3381 maij = (Mat_MPIAIJ*)(*mat)->data; 3382 3383 (*mat)->preallocated = PETSC_TRUE; 3384 3385 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3386 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3387 3388 /* Set A as diagonal portion of *mat */ 3389 maij->A = A; 3390 3391 nz = oi[m]; 3392 for (i=0; i<nz; i++) { 3393 col = oj[i]; 3394 oj[i] = garray[col]; 3395 } 3396 3397 /* Set Bnew as off-diagonal portion of *mat */ 3398 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3399 bnew = (Mat_SeqAIJ*)Bnew->data; 3400 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3401 maij->B = Bnew; 3402 3403 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3404 3405 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3406 b->free_a = PETSC_FALSE; 3407 b->free_ij = PETSC_FALSE; 3408 ierr = MatDestroy(&B);CHKERRQ(ierr); 3409 3410 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3411 bnew->free_a = PETSC_TRUE; 3412 bnew->free_ij = PETSC_TRUE; 3413 3414 /* condense columns of maij->B */ 3415 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3416 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3417 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3418 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3419 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3420 PetscFunctionReturn(0); 3421 } 3422 3423 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3424 3425 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3426 { 3427 PetscErrorCode ierr; 3428 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3429 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3430 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3431 Mat M,Msub,B=a->B; 3432 MatScalar *aa; 3433 Mat_SeqAIJ *aij; 3434 PetscInt *garray = a->garray,*colsub,Ncols; 3435 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3436 IS iscol_sub,iscmap; 3437 const PetscInt *is_idx,*cmap; 3438 PetscBool allcolumns=PETSC_FALSE; 3439 MPI_Comm comm; 3440 3441 PetscFunctionBegin; 3442 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3443 3444 if (call == MAT_REUSE_MATRIX) { 3445 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3446 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3447 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3448 3449 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3450 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3451 3452 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3453 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3454 3455 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3456 3457 } else { /* call == MAT_INITIAL_MATRIX) */ 3458 PetscBool flg; 3459 3460 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3461 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3462 3463 /* (1) iscol -> nonscalable iscol_local */ 3464 /* Check for special case: each processor gets entire matrix columns */ 3465 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3466 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3467 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3468 if (allcolumns) { 3469 iscol_sub = iscol_local; 3470 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3471 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3472 3473 } else { 3474 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3475 PetscInt *idx,*cmap1,k; 3476 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3477 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3478 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3479 count = 0; 3480 k = 0; 3481 for (i=0; i<Ncols; i++) { 3482 j = is_idx[i]; 3483 if (j >= cstart && j < cend) { 3484 /* diagonal part of mat */ 3485 idx[count] = j; 3486 cmap1[count++] = i; /* column index in submat */ 3487 } else if (Bn) { 3488 /* off-diagonal part of mat */ 3489 if (j == garray[k]) { 3490 idx[count] = j; 3491 cmap1[count++] = i; /* column index in submat */ 3492 } else if (j > garray[k]) { 3493 while (j > garray[k] && k < Bn-1) k++; 3494 if (j == garray[k]) { 3495 idx[count] = j; 3496 cmap1[count++] = i; /* column index in submat */ 3497 } 3498 } 3499 } 3500 } 3501 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3502 3503 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3504 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3505 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3506 3507 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3508 } 3509 3510 /* (3) Create sequential Msub */ 3511 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3512 } 3513 3514 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3515 aij = (Mat_SeqAIJ*)(Msub)->data; 3516 ii = aij->i; 3517 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3518 3519 /* 3520 m - number of local rows 3521 Ncols - number of columns (same on all processors) 3522 rstart - first row in new global matrix generated 3523 */ 3524 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3525 3526 if (call == MAT_INITIAL_MATRIX) { 3527 /* (4) Create parallel newmat */ 3528 PetscMPIInt rank,size; 3529 PetscInt csize; 3530 3531 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3532 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3533 3534 /* 3535 Determine the number of non-zeros in the diagonal and off-diagonal 3536 portions of the matrix in order to do correct preallocation 3537 */ 3538 3539 /* first get start and end of "diagonal" columns */ 3540 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3541 if (csize == PETSC_DECIDE) { 3542 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3543 if (mglobal == Ncols) { /* square matrix */ 3544 nlocal = m; 3545 } else { 3546 nlocal = Ncols/size + ((Ncols % size) > rank); 3547 } 3548 } else { 3549 nlocal = csize; 3550 } 3551 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3552 rstart = rend - nlocal; 3553 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3554 3555 /* next, compute all the lengths */ 3556 jj = aij->j; 3557 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3558 olens = dlens + m; 3559 for (i=0; i<m; i++) { 3560 jend = ii[i+1] - ii[i]; 3561 olen = 0; 3562 dlen = 0; 3563 for (j=0; j<jend; j++) { 3564 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3565 else dlen++; 3566 jj++; 3567 } 3568 olens[i] = olen; 3569 dlens[i] = dlen; 3570 } 3571 3572 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3573 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3574 3575 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3576 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3577 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3578 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3579 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3580 ierr = PetscFree(dlens);CHKERRQ(ierr); 3581 3582 } else { /* call == MAT_REUSE_MATRIX */ 3583 M = *newmat; 3584 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3585 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3586 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3587 /* 3588 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3589 rather than the slower MatSetValues(). 3590 */ 3591 M->was_assembled = PETSC_TRUE; 3592 M->assembled = PETSC_FALSE; 3593 } 3594 3595 /* (5) Set values of Msub to *newmat */ 3596 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3597 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3598 3599 jj = aij->j; 3600 aa = aij->a; 3601 for (i=0; i<m; i++) { 3602 row = rstart + i; 3603 nz = ii[i+1] - ii[i]; 3604 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3605 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3606 jj += nz; aa += nz; 3607 } 3608 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3609 3610 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3611 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3612 3613 ierr = PetscFree(colsub);CHKERRQ(ierr); 3614 3615 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3616 if (call == MAT_INITIAL_MATRIX) { 3617 *newmat = M; 3618 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3619 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3620 3621 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3622 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3623 3624 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3625 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3626 3627 if (iscol_local) { 3628 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3629 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3630 } 3631 } 3632 PetscFunctionReturn(0); 3633 } 3634 3635 /* 3636 Not great since it makes two copies of the submatrix, first an SeqAIJ 3637 in local and then by concatenating the local matrices the end result. 3638 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3639 3640 Note: This requires a sequential iscol with all indices. 3641 */ 3642 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3643 { 3644 PetscErrorCode ierr; 3645 PetscMPIInt rank,size; 3646 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3647 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3648 Mat M,Mreuse; 3649 MatScalar *aa,*vwork; 3650 MPI_Comm comm; 3651 Mat_SeqAIJ *aij; 3652 PetscBool colflag,allcolumns=PETSC_FALSE; 3653 3654 PetscFunctionBegin; 3655 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3656 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3657 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3658 3659 /* Check for special case: each processor gets entire matrix columns */ 3660 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3661 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3662 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3663 ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3664 3665 if (call == MAT_REUSE_MATRIX) { 3666 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3667 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3668 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3669 } else { 3670 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3671 } 3672 3673 /* 3674 m - number of local rows 3675 n - number of columns (same on all processors) 3676 rstart - first row in new global matrix generated 3677 */ 3678 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3679 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3680 if (call == MAT_INITIAL_MATRIX) { 3681 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3682 ii = aij->i; 3683 jj = aij->j; 3684 3685 /* 3686 Determine the number of non-zeros in the diagonal and off-diagonal 3687 portions of the matrix in order to do correct preallocation 3688 */ 3689 3690 /* first get start and end of "diagonal" columns */ 3691 if (csize == PETSC_DECIDE) { 3692 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3693 if (mglobal == n) { /* square matrix */ 3694 nlocal = m; 3695 } else { 3696 nlocal = n/size + ((n % size) > rank); 3697 } 3698 } else { 3699 nlocal = csize; 3700 } 3701 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3702 rstart = rend - nlocal; 3703 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3704 3705 /* next, compute all the lengths */ 3706 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3707 olens = dlens + m; 3708 for (i=0; i<m; i++) { 3709 jend = ii[i+1] - ii[i]; 3710 olen = 0; 3711 dlen = 0; 3712 for (j=0; j<jend; j++) { 3713 if (*jj < rstart || *jj >= rend) olen++; 3714 else dlen++; 3715 jj++; 3716 } 3717 olens[i] = olen; 3718 dlens[i] = dlen; 3719 } 3720 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3721 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3722 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3723 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3724 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3725 ierr = PetscFree(dlens);CHKERRQ(ierr); 3726 } else { 3727 PetscInt ml,nl; 3728 3729 M = *newmat; 3730 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3731 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3732 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3733 /* 3734 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3735 rather than the slower MatSetValues(). 3736 */ 3737 M->was_assembled = PETSC_TRUE; 3738 M->assembled = PETSC_FALSE; 3739 } 3740 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3741 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3742 ii = aij->i; 3743 jj = aij->j; 3744 aa = aij->a; 3745 for (i=0; i<m; i++) { 3746 row = rstart + i; 3747 nz = ii[i+1] - ii[i]; 3748 cwork = jj; jj += nz; 3749 vwork = aa; aa += nz; 3750 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3751 } 3752 3753 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3754 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3755 *newmat = M; 3756 3757 /* save submatrix used in processor for next request */ 3758 if (call == MAT_INITIAL_MATRIX) { 3759 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3760 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3761 } 3762 PetscFunctionReturn(0); 3763 } 3764 3765 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3766 { 3767 PetscInt m,cstart, cend,j,nnz,i,d; 3768 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3769 const PetscInt *JJ; 3770 PetscErrorCode ierr; 3771 PetscBool nooffprocentries; 3772 3773 PetscFunctionBegin; 3774 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3775 3776 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3777 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3778 m = B->rmap->n; 3779 cstart = B->cmap->rstart; 3780 cend = B->cmap->rend; 3781 rstart = B->rmap->rstart; 3782 3783 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3784 3785 if (PetscDefined(USE_DEBUG)) { 3786 for (i=0; i<m; i++) { 3787 nnz = Ii[i+1]- Ii[i]; 3788 JJ = J + Ii[i]; 3789 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3790 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3791 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3792 } 3793 } 3794 3795 for (i=0; i<m; i++) { 3796 nnz = Ii[i+1]- Ii[i]; 3797 JJ = J + Ii[i]; 3798 nnz_max = PetscMax(nnz_max,nnz); 3799 d = 0; 3800 for (j=0; j<nnz; j++) { 3801 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3802 } 3803 d_nnz[i] = d; 3804 o_nnz[i] = nnz - d; 3805 } 3806 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3807 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3808 3809 for (i=0; i<m; i++) { 3810 ii = i + rstart; 3811 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3812 } 3813 nooffprocentries = B->nooffprocentries; 3814 B->nooffprocentries = PETSC_TRUE; 3815 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3816 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3817 B->nooffprocentries = nooffprocentries; 3818 3819 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3820 PetscFunctionReturn(0); 3821 } 3822 3823 /*@ 3824 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3825 (the default parallel PETSc format). 3826 3827 Collective 3828 3829 Input Parameters: 3830 + B - the matrix 3831 . i - the indices into j for the start of each local row (starts with zero) 3832 . j - the column indices for each local row (starts with zero) 3833 - v - optional values in the matrix 3834 3835 Level: developer 3836 3837 Notes: 3838 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3839 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3840 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3841 3842 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3843 3844 The format which is used for the sparse matrix input, is equivalent to a 3845 row-major ordering.. i.e for the following matrix, the input data expected is 3846 as shown 3847 3848 $ 1 0 0 3849 $ 2 0 3 P0 3850 $ ------- 3851 $ 4 5 6 P1 3852 $ 3853 $ Process0 [P0]: rows_owned=[0,1] 3854 $ i = {0,1,3} [size = nrow+1 = 2+1] 3855 $ j = {0,0,2} [size = 3] 3856 $ v = {1,2,3} [size = 3] 3857 $ 3858 $ Process1 [P1]: rows_owned=[2] 3859 $ i = {0,3} [size = nrow+1 = 1+1] 3860 $ j = {0,1,2} [size = 3] 3861 $ v = {4,5,6} [size = 3] 3862 3863 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3864 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3865 @*/ 3866 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3867 { 3868 PetscErrorCode ierr; 3869 3870 PetscFunctionBegin; 3871 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3872 PetscFunctionReturn(0); 3873 } 3874 3875 /*@C 3876 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3877 (the default parallel PETSc format). For good matrix assembly performance 3878 the user should preallocate the matrix storage by setting the parameters 3879 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3880 performance can be increased by more than a factor of 50. 3881 3882 Collective 3883 3884 Input Parameters: 3885 + B - the matrix 3886 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3887 (same value is used for all local rows) 3888 . d_nnz - array containing the number of nonzeros in the various rows of the 3889 DIAGONAL portion of the local submatrix (possibly different for each row) 3890 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3891 The size of this array is equal to the number of local rows, i.e 'm'. 3892 For matrices that will be factored, you must leave room for (and set) 3893 the diagonal entry even if it is zero. 3894 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3895 submatrix (same value is used for all local rows). 3896 - o_nnz - array containing the number of nonzeros in the various rows of the 3897 OFF-DIAGONAL portion of the local submatrix (possibly different for 3898 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3899 structure. The size of this array is equal to the number 3900 of local rows, i.e 'm'. 3901 3902 If the *_nnz parameter is given then the *_nz parameter is ignored 3903 3904 The AIJ format (also called the Yale sparse matrix format or 3905 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3906 storage. The stored row and column indices begin with zero. 3907 See Users-Manual: ch_mat for details. 3908 3909 The parallel matrix is partitioned such that the first m0 rows belong to 3910 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3911 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3912 3913 The DIAGONAL portion of the local submatrix of a processor can be defined 3914 as the submatrix which is obtained by extraction the part corresponding to 3915 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3916 first row that belongs to the processor, r2 is the last row belonging to 3917 the this processor, and c1-c2 is range of indices of the local part of a 3918 vector suitable for applying the matrix to. This is an mxn matrix. In the 3919 common case of a square matrix, the row and column ranges are the same and 3920 the DIAGONAL part is also square. The remaining portion of the local 3921 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3922 3923 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3924 3925 You can call MatGetInfo() to get information on how effective the preallocation was; 3926 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3927 You can also run with the option -info and look for messages with the string 3928 malloc in them to see if additional memory allocation was needed. 3929 3930 Example usage: 3931 3932 Consider the following 8x8 matrix with 34 non-zero values, that is 3933 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3934 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3935 as follows: 3936 3937 .vb 3938 1 2 0 | 0 3 0 | 0 4 3939 Proc0 0 5 6 | 7 0 0 | 8 0 3940 9 0 10 | 11 0 0 | 12 0 3941 ------------------------------------- 3942 13 0 14 | 15 16 17 | 0 0 3943 Proc1 0 18 0 | 19 20 21 | 0 0 3944 0 0 0 | 22 23 0 | 24 0 3945 ------------------------------------- 3946 Proc2 25 26 27 | 0 0 28 | 29 0 3947 30 0 0 | 31 32 33 | 0 34 3948 .ve 3949 3950 This can be represented as a collection of submatrices as: 3951 3952 .vb 3953 A B C 3954 D E F 3955 G H I 3956 .ve 3957 3958 Where the submatrices A,B,C are owned by proc0, D,E,F are 3959 owned by proc1, G,H,I are owned by proc2. 3960 3961 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3962 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3963 The 'M','N' parameters are 8,8, and have the same values on all procs. 3964 3965 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3966 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3967 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3968 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3969 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3970 matrix, ans [DF] as another SeqAIJ matrix. 3971 3972 When d_nz, o_nz parameters are specified, d_nz storage elements are 3973 allocated for every row of the local diagonal submatrix, and o_nz 3974 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3975 One way to choose d_nz and o_nz is to use the max nonzerors per local 3976 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3977 In this case, the values of d_nz,o_nz are: 3978 .vb 3979 proc0 : dnz = 2, o_nz = 2 3980 proc1 : dnz = 3, o_nz = 2 3981 proc2 : dnz = 1, o_nz = 4 3982 .ve 3983 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3984 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3985 for proc3. i.e we are using 12+15+10=37 storage locations to store 3986 34 values. 3987 3988 When d_nnz, o_nnz parameters are specified, the storage is specified 3989 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3990 In the above case the values for d_nnz,o_nnz are: 3991 .vb 3992 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3993 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3994 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3995 .ve 3996 Here the space allocated is sum of all the above values i.e 34, and 3997 hence pre-allocation is perfect. 3998 3999 Level: intermediate 4000 4001 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4002 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4003 @*/ 4004 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4005 { 4006 PetscErrorCode ierr; 4007 4008 PetscFunctionBegin; 4009 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4010 PetscValidType(B,1); 4011 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4012 PetscFunctionReturn(0); 4013 } 4014 4015 /*@ 4016 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4017 CSR format for the local rows. 4018 4019 Collective 4020 4021 Input Parameters: 4022 + comm - MPI communicator 4023 . m - number of local rows (Cannot be PETSC_DECIDE) 4024 . n - This value should be the same as the local size used in creating the 4025 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4026 calculated if N is given) For square matrices n is almost always m. 4027 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4028 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4029 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4030 . j - column indices 4031 - a - matrix values 4032 4033 Output Parameter: 4034 . mat - the matrix 4035 4036 Level: intermediate 4037 4038 Notes: 4039 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4040 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4041 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4042 4043 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4044 4045 The format which is used for the sparse matrix input, is equivalent to a 4046 row-major ordering.. i.e for the following matrix, the input data expected is 4047 as shown 4048 4049 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4050 4051 $ 1 0 0 4052 $ 2 0 3 P0 4053 $ ------- 4054 $ 4 5 6 P1 4055 $ 4056 $ Process0 [P0]: rows_owned=[0,1] 4057 $ i = {0,1,3} [size = nrow+1 = 2+1] 4058 $ j = {0,0,2} [size = 3] 4059 $ v = {1,2,3} [size = 3] 4060 $ 4061 $ Process1 [P1]: rows_owned=[2] 4062 $ i = {0,3} [size = nrow+1 = 1+1] 4063 $ j = {0,1,2} [size = 3] 4064 $ v = {4,5,6} [size = 3] 4065 4066 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4067 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4068 @*/ 4069 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4070 { 4071 PetscErrorCode ierr; 4072 4073 PetscFunctionBegin; 4074 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4075 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4076 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4077 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4078 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4079 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4080 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4081 PetscFunctionReturn(0); 4082 } 4083 4084 /*@ 4085 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4086 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4087 4088 Collective 4089 4090 Input Parameters: 4091 + mat - the matrix 4092 . m - number of local rows (Cannot be PETSC_DECIDE) 4093 . n - This value should be the same as the local size used in creating the 4094 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4095 calculated if N is given) For square matrices n is almost always m. 4096 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4097 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4098 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4099 . J - column indices 4100 - v - matrix values 4101 4102 Level: intermediate 4103 4104 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4105 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4106 @*/ 4107 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4108 { 4109 PetscErrorCode ierr; 4110 PetscInt cstart,nnz,i,j; 4111 PetscInt *ld; 4112 PetscBool nooffprocentries; 4113 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4114 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4115 PetscScalar *ad = Ad->a, *ao = Ao->a; 4116 const PetscInt *Adi = Ad->i; 4117 PetscInt ldi,Iii,md; 4118 4119 PetscFunctionBegin; 4120 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4121 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4122 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4123 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4124 4125 cstart = mat->cmap->rstart; 4126 if (!Aij->ld) { 4127 /* count number of entries below block diagonal */ 4128 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4129 Aij->ld = ld; 4130 for (i=0; i<m; i++) { 4131 nnz = Ii[i+1]- Ii[i]; 4132 j = 0; 4133 while (J[j] < cstart && j < nnz) {j++;} 4134 J += nnz; 4135 ld[i] = j; 4136 } 4137 } else { 4138 ld = Aij->ld; 4139 } 4140 4141 for (i=0; i<m; i++) { 4142 nnz = Ii[i+1]- Ii[i]; 4143 Iii = Ii[i]; 4144 ldi = ld[i]; 4145 md = Adi[i+1]-Adi[i]; 4146 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4147 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4148 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4149 ad += md; 4150 ao += nnz - md; 4151 } 4152 nooffprocentries = mat->nooffprocentries; 4153 mat->nooffprocentries = PETSC_TRUE; 4154 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4155 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4156 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4157 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4158 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4159 mat->nooffprocentries = nooffprocentries; 4160 PetscFunctionReturn(0); 4161 } 4162 4163 /*@C 4164 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4165 (the default parallel PETSc format). For good matrix assembly performance 4166 the user should preallocate the matrix storage by setting the parameters 4167 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4168 performance can be increased by more than a factor of 50. 4169 4170 Collective 4171 4172 Input Parameters: 4173 + comm - MPI communicator 4174 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4175 This value should be the same as the local size used in creating the 4176 y vector for the matrix-vector product y = Ax. 4177 . n - This value should be the same as the local size used in creating the 4178 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4179 calculated if N is given) For square matrices n is almost always m. 4180 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4181 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4182 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4183 (same value is used for all local rows) 4184 . d_nnz - array containing the number of nonzeros in the various rows of the 4185 DIAGONAL portion of the local submatrix (possibly different for each row) 4186 or NULL, if d_nz is used to specify the nonzero structure. 4187 The size of this array is equal to the number of local rows, i.e 'm'. 4188 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4189 submatrix (same value is used for all local rows). 4190 - o_nnz - array containing the number of nonzeros in the various rows of the 4191 OFF-DIAGONAL portion of the local submatrix (possibly different for 4192 each row) or NULL, if o_nz is used to specify the nonzero 4193 structure. The size of this array is equal to the number 4194 of local rows, i.e 'm'. 4195 4196 Output Parameter: 4197 . A - the matrix 4198 4199 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4200 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4201 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4202 4203 Notes: 4204 If the *_nnz parameter is given then the *_nz parameter is ignored 4205 4206 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4207 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4208 storage requirements for this matrix. 4209 4210 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4211 processor than it must be used on all processors that share the object for 4212 that argument. 4213 4214 The user MUST specify either the local or global matrix dimensions 4215 (possibly both). 4216 4217 The parallel matrix is partitioned across processors such that the 4218 first m0 rows belong to process 0, the next m1 rows belong to 4219 process 1, the next m2 rows belong to process 2 etc.. where 4220 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4221 values corresponding to [m x N] submatrix. 4222 4223 The columns are logically partitioned with the n0 columns belonging 4224 to 0th partition, the next n1 columns belonging to the next 4225 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4226 4227 The DIAGONAL portion of the local submatrix on any given processor 4228 is the submatrix corresponding to the rows and columns m,n 4229 corresponding to the given processor. i.e diagonal matrix on 4230 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4231 etc. The remaining portion of the local submatrix [m x (N-n)] 4232 constitute the OFF-DIAGONAL portion. The example below better 4233 illustrates this concept. 4234 4235 For a square global matrix we define each processor's diagonal portion 4236 to be its local rows and the corresponding columns (a square submatrix); 4237 each processor's off-diagonal portion encompasses the remainder of the 4238 local matrix (a rectangular submatrix). 4239 4240 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4241 4242 When calling this routine with a single process communicator, a matrix of 4243 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4244 type of communicator, use the construction mechanism 4245 .vb 4246 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4247 .ve 4248 4249 $ MatCreate(...,&A); 4250 $ MatSetType(A,MATMPIAIJ); 4251 $ MatSetSizes(A, m,n,M,N); 4252 $ MatMPIAIJSetPreallocation(A,...); 4253 4254 By default, this format uses inodes (identical nodes) when possible. 4255 We search for consecutive rows with the same nonzero structure, thereby 4256 reusing matrix information to achieve increased efficiency. 4257 4258 Options Database Keys: 4259 + -mat_no_inode - Do not use inodes 4260 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4261 4262 4263 4264 Example usage: 4265 4266 Consider the following 8x8 matrix with 34 non-zero values, that is 4267 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4268 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4269 as follows 4270 4271 .vb 4272 1 2 0 | 0 3 0 | 0 4 4273 Proc0 0 5 6 | 7 0 0 | 8 0 4274 9 0 10 | 11 0 0 | 12 0 4275 ------------------------------------- 4276 13 0 14 | 15 16 17 | 0 0 4277 Proc1 0 18 0 | 19 20 21 | 0 0 4278 0 0 0 | 22 23 0 | 24 0 4279 ------------------------------------- 4280 Proc2 25 26 27 | 0 0 28 | 29 0 4281 30 0 0 | 31 32 33 | 0 34 4282 .ve 4283 4284 This can be represented as a collection of submatrices as 4285 4286 .vb 4287 A B C 4288 D E F 4289 G H I 4290 .ve 4291 4292 Where the submatrices A,B,C are owned by proc0, D,E,F are 4293 owned by proc1, G,H,I are owned by proc2. 4294 4295 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4296 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4297 The 'M','N' parameters are 8,8, and have the same values on all procs. 4298 4299 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4300 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4301 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4302 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4303 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4304 matrix, ans [DF] as another SeqAIJ matrix. 4305 4306 When d_nz, o_nz parameters are specified, d_nz storage elements are 4307 allocated for every row of the local diagonal submatrix, and o_nz 4308 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4309 One way to choose d_nz and o_nz is to use the max nonzerors per local 4310 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4311 In this case, the values of d_nz,o_nz are 4312 .vb 4313 proc0 : dnz = 2, o_nz = 2 4314 proc1 : dnz = 3, o_nz = 2 4315 proc2 : dnz = 1, o_nz = 4 4316 .ve 4317 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4318 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4319 for proc3. i.e we are using 12+15+10=37 storage locations to store 4320 34 values. 4321 4322 When d_nnz, o_nnz parameters are specified, the storage is specified 4323 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4324 In the above case the values for d_nnz,o_nnz are 4325 .vb 4326 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4327 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4328 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4329 .ve 4330 Here the space allocated is sum of all the above values i.e 34, and 4331 hence pre-allocation is perfect. 4332 4333 Level: intermediate 4334 4335 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4336 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4337 @*/ 4338 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4339 { 4340 PetscErrorCode ierr; 4341 PetscMPIInt size; 4342 4343 PetscFunctionBegin; 4344 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4345 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4346 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4347 if (size > 1) { 4348 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4349 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4350 } else { 4351 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4352 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4353 } 4354 PetscFunctionReturn(0); 4355 } 4356 4357 /*@C 4358 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4359 4360 Not collective 4361 4362 Input Parameter: 4363 . A - The MPIAIJ matrix 4364 4365 Output Parameters: 4366 + Ad - The local diagonal block as a SeqAIJ matrix 4367 . Ao - The local off-diagonal block as a SeqAIJ matrix 4368 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4369 4370 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4371 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4372 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4373 local column numbers to global column numbers in the original matrix. 4374 4375 Level: intermediate 4376 4377 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4378 @*/ 4379 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4380 { 4381 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4382 PetscBool flg; 4383 PetscErrorCode ierr; 4384 4385 PetscFunctionBegin; 4386 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4387 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4388 if (Ad) *Ad = a->A; 4389 if (Ao) *Ao = a->B; 4390 if (colmap) *colmap = a->garray; 4391 PetscFunctionReturn(0); 4392 } 4393 4394 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4395 { 4396 PetscErrorCode ierr; 4397 PetscInt m,N,i,rstart,nnz,Ii; 4398 PetscInt *indx; 4399 PetscScalar *values; 4400 4401 PetscFunctionBegin; 4402 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4403 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4404 PetscInt *dnz,*onz,sum,bs,cbs; 4405 4406 if (n == PETSC_DECIDE) { 4407 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4408 } 4409 /* Check sum(n) = N */ 4410 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4411 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4412 4413 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4414 rstart -= m; 4415 4416 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4417 for (i=0; i<m; i++) { 4418 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4419 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4420 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4421 } 4422 4423 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4424 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4425 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4426 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4427 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4428 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4429 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4430 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4431 } 4432 4433 /* numeric phase */ 4434 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4435 for (i=0; i<m; i++) { 4436 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4437 Ii = i + rstart; 4438 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4439 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4440 } 4441 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4442 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4443 PetscFunctionReturn(0); 4444 } 4445 4446 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4447 { 4448 PetscErrorCode ierr; 4449 PetscMPIInt rank; 4450 PetscInt m,N,i,rstart,nnz; 4451 size_t len; 4452 const PetscInt *indx; 4453 PetscViewer out; 4454 char *name; 4455 Mat B; 4456 const PetscScalar *values; 4457 4458 PetscFunctionBegin; 4459 ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr); 4460 ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr); 4461 /* Should this be the type of the diagonal block of A? */ 4462 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4463 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4464 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4465 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4466 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4467 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 4468 for (i=0; i<m; i++) { 4469 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4470 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4471 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4472 } 4473 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4474 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4475 4476 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4477 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4478 ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr); 4479 ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr); 4480 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4481 ierr = PetscFree(name);CHKERRQ(ierr); 4482 ierr = MatView(B,out);CHKERRQ(ierr); 4483 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4484 ierr = MatDestroy(&B);CHKERRQ(ierr); 4485 PetscFunctionReturn(0); 4486 } 4487 4488 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4489 { 4490 PetscErrorCode ierr; 4491 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4492 4493 PetscFunctionBegin; 4494 if (!merge) PetscFunctionReturn(0); 4495 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4505 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4506 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4507 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4508 ierr = PetscFree(merge);CHKERRQ(ierr); 4509 PetscFunctionReturn(0); 4510 } 4511 4512 #include <../src/mat/utils/freespace.h> 4513 #include <petscbt.h> 4514 4515 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4516 { 4517 PetscErrorCode ierr; 4518 MPI_Comm comm; 4519 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4520 PetscMPIInt size,rank,taga,*len_s; 4521 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4522 PetscInt proc,m; 4523 PetscInt **buf_ri,**buf_rj; 4524 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4525 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4526 MPI_Request *s_waits,*r_waits; 4527 MPI_Status *status; 4528 MatScalar *aa=a->a; 4529 MatScalar **abuf_r,*ba_i; 4530 Mat_Merge_SeqsToMPI *merge; 4531 PetscContainer container; 4532 4533 PetscFunctionBegin; 4534 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4535 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4536 4537 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4538 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4539 4540 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4541 if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4542 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4543 4544 bi = merge->bi; 4545 bj = merge->bj; 4546 buf_ri = merge->buf_ri; 4547 buf_rj = merge->buf_rj; 4548 4549 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4550 owners = merge->rowmap->range; 4551 len_s = merge->len_s; 4552 4553 /* send and recv matrix values */ 4554 /*-----------------------------*/ 4555 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4556 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4557 4558 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4559 for (proc=0,k=0; proc<size; proc++) { 4560 if (!len_s[proc]) continue; 4561 i = owners[proc]; 4562 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4563 k++; 4564 } 4565 4566 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4567 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4568 ierr = PetscFree(status);CHKERRQ(ierr); 4569 4570 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4571 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4572 4573 /* insert mat values of mpimat */ 4574 /*----------------------------*/ 4575 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4576 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4577 4578 for (k=0; k<merge->nrecv; k++) { 4579 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4580 nrows = *(buf_ri_k[k]); 4581 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4582 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4583 } 4584 4585 /* set values of ba */ 4586 m = merge->rowmap->n; 4587 for (i=0; i<m; i++) { 4588 arow = owners[rank] + i; 4589 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4590 bnzi = bi[i+1] - bi[i]; 4591 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4592 4593 /* add local non-zero vals of this proc's seqmat into ba */ 4594 anzi = ai[arow+1] - ai[arow]; 4595 aj = a->j + ai[arow]; 4596 aa = a->a + ai[arow]; 4597 nextaj = 0; 4598 for (j=0; nextaj<anzi; j++) { 4599 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4600 ba_i[j] += aa[nextaj++]; 4601 } 4602 } 4603 4604 /* add received vals into ba */ 4605 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4606 /* i-th row */ 4607 if (i == *nextrow[k]) { 4608 anzi = *(nextai[k]+1) - *nextai[k]; 4609 aj = buf_rj[k] + *(nextai[k]); 4610 aa = abuf_r[k] + *(nextai[k]); 4611 nextaj = 0; 4612 for (j=0; nextaj<anzi; j++) { 4613 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4614 ba_i[j] += aa[nextaj++]; 4615 } 4616 } 4617 nextrow[k]++; nextai[k]++; 4618 } 4619 } 4620 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4621 } 4622 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4623 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4624 4625 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4626 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4627 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4628 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4629 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4630 PetscFunctionReturn(0); 4631 } 4632 4633 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4634 { 4635 PetscErrorCode ierr; 4636 Mat B_mpi; 4637 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4638 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4639 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4640 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4641 PetscInt len,proc,*dnz,*onz,bs,cbs; 4642 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4643 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4644 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4645 MPI_Status *status; 4646 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4647 PetscBT lnkbt; 4648 Mat_Merge_SeqsToMPI *merge; 4649 PetscContainer container; 4650 4651 PetscFunctionBegin; 4652 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4653 4654 /* make sure it is a PETSc comm */ 4655 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4656 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4657 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4658 4659 ierr = PetscNew(&merge);CHKERRQ(ierr); 4660 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4661 4662 /* determine row ownership */ 4663 /*---------------------------------------------------------*/ 4664 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4665 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4666 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4667 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4668 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4669 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4670 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4671 4672 m = merge->rowmap->n; 4673 owners = merge->rowmap->range; 4674 4675 /* determine the number of messages to send, their lengths */ 4676 /*---------------------------------------------------------*/ 4677 len_s = merge->len_s; 4678 4679 len = 0; /* length of buf_si[] */ 4680 merge->nsend = 0; 4681 for (proc=0; proc<size; proc++) { 4682 len_si[proc] = 0; 4683 if (proc == rank) { 4684 len_s[proc] = 0; 4685 } else { 4686 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4687 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4688 } 4689 if (len_s[proc]) { 4690 merge->nsend++; 4691 nrows = 0; 4692 for (i=owners[proc]; i<owners[proc+1]; i++) { 4693 if (ai[i+1] > ai[i]) nrows++; 4694 } 4695 len_si[proc] = 2*(nrows+1); 4696 len += len_si[proc]; 4697 } 4698 } 4699 4700 /* determine the number and length of messages to receive for ij-structure */ 4701 /*-------------------------------------------------------------------------*/ 4702 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4703 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4704 4705 /* post the Irecv of j-structure */ 4706 /*-------------------------------*/ 4707 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4708 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4709 4710 /* post the Isend of j-structure */ 4711 /*--------------------------------*/ 4712 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4713 4714 for (proc=0, k=0; proc<size; proc++) { 4715 if (!len_s[proc]) continue; 4716 i = owners[proc]; 4717 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4718 k++; 4719 } 4720 4721 /* receives and sends of j-structure are complete */ 4722 /*------------------------------------------------*/ 4723 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4724 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4725 4726 /* send and recv i-structure */ 4727 /*---------------------------*/ 4728 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4729 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4730 4731 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4732 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4733 for (proc=0,k=0; proc<size; proc++) { 4734 if (!len_s[proc]) continue; 4735 /* form outgoing message for i-structure: 4736 buf_si[0]: nrows to be sent 4737 [1:nrows]: row index (global) 4738 [nrows+1:2*nrows+1]: i-structure index 4739 */ 4740 /*-------------------------------------------*/ 4741 nrows = len_si[proc]/2 - 1; 4742 buf_si_i = buf_si + nrows+1; 4743 buf_si[0] = nrows; 4744 buf_si_i[0] = 0; 4745 nrows = 0; 4746 for (i=owners[proc]; i<owners[proc+1]; i++) { 4747 anzi = ai[i+1] - ai[i]; 4748 if (anzi) { 4749 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4750 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4751 nrows++; 4752 } 4753 } 4754 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4755 k++; 4756 buf_si += len_si[proc]; 4757 } 4758 4759 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4760 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4761 4762 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4763 for (i=0; i<merge->nrecv; i++) { 4764 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4765 } 4766 4767 ierr = PetscFree(len_si);CHKERRQ(ierr); 4768 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4769 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4770 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4771 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4772 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4773 ierr = PetscFree(status);CHKERRQ(ierr); 4774 4775 /* compute a local seq matrix in each processor */ 4776 /*----------------------------------------------*/ 4777 /* allocate bi array and free space for accumulating nonzero column info */ 4778 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4779 bi[0] = 0; 4780 4781 /* create and initialize a linked list */ 4782 nlnk = N+1; 4783 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4784 4785 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4786 len = ai[owners[rank+1]] - ai[owners[rank]]; 4787 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4788 4789 current_space = free_space; 4790 4791 /* determine symbolic info for each local row */ 4792 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4793 4794 for (k=0; k<merge->nrecv; k++) { 4795 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4796 nrows = *buf_ri_k[k]; 4797 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4798 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4799 } 4800 4801 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4802 len = 0; 4803 for (i=0; i<m; i++) { 4804 bnzi = 0; 4805 /* add local non-zero cols of this proc's seqmat into lnk */ 4806 arow = owners[rank] + i; 4807 anzi = ai[arow+1] - ai[arow]; 4808 aj = a->j + ai[arow]; 4809 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4810 bnzi += nlnk; 4811 /* add received col data into lnk */ 4812 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4813 if (i == *nextrow[k]) { /* i-th row */ 4814 anzi = *(nextai[k]+1) - *nextai[k]; 4815 aj = buf_rj[k] + *nextai[k]; 4816 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4817 bnzi += nlnk; 4818 nextrow[k]++; nextai[k]++; 4819 } 4820 } 4821 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4822 4823 /* if free space is not available, make more free space */ 4824 if (current_space->local_remaining<bnzi) { 4825 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4826 nspacedouble++; 4827 } 4828 /* copy data into free space, then initialize lnk */ 4829 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4830 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4831 4832 current_space->array += bnzi; 4833 current_space->local_used += bnzi; 4834 current_space->local_remaining -= bnzi; 4835 4836 bi[i+1] = bi[i] + bnzi; 4837 } 4838 4839 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4840 4841 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4842 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4843 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4844 4845 /* create symbolic parallel matrix B_mpi */ 4846 /*---------------------------------------*/ 4847 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4848 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4849 if (n==PETSC_DECIDE) { 4850 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4851 } else { 4852 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4853 } 4854 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4855 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4856 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4857 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4858 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4859 4860 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4861 B_mpi->assembled = PETSC_FALSE; 4862 merge->bi = bi; 4863 merge->bj = bj; 4864 merge->buf_ri = buf_ri; 4865 merge->buf_rj = buf_rj; 4866 merge->coi = NULL; 4867 merge->coj = NULL; 4868 merge->owners_co = NULL; 4869 4870 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4871 4872 /* attach the supporting struct to B_mpi for reuse */ 4873 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4874 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4875 ierr = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr); 4876 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4877 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4878 *mpimat = B_mpi; 4879 4880 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4881 PetscFunctionReturn(0); 4882 } 4883 4884 /*@C 4885 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4886 matrices from each processor 4887 4888 Collective 4889 4890 Input Parameters: 4891 + comm - the communicators the parallel matrix will live on 4892 . seqmat - the input sequential matrices 4893 . m - number of local rows (or PETSC_DECIDE) 4894 . n - number of local columns (or PETSC_DECIDE) 4895 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4896 4897 Output Parameter: 4898 . mpimat - the parallel matrix generated 4899 4900 Level: advanced 4901 4902 Notes: 4903 The dimensions of the sequential matrix in each processor MUST be the same. 4904 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4905 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4906 @*/ 4907 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4908 { 4909 PetscErrorCode ierr; 4910 PetscMPIInt size; 4911 4912 PetscFunctionBegin; 4913 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4914 if (size == 1) { 4915 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4916 if (scall == MAT_INITIAL_MATRIX) { 4917 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4918 } else { 4919 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4920 } 4921 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4922 PetscFunctionReturn(0); 4923 } 4924 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4925 if (scall == MAT_INITIAL_MATRIX) { 4926 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4927 } 4928 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4929 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4930 PetscFunctionReturn(0); 4931 } 4932 4933 /*@ 4934 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4935 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4936 with MatGetSize() 4937 4938 Not Collective 4939 4940 Input Parameters: 4941 + A - the matrix 4942 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4943 4944 Output Parameter: 4945 . A_loc - the local sequential matrix generated 4946 4947 Level: developer 4948 4949 Notes: 4950 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4951 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4952 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4953 modify the values of the returned A_loc. 4954 4955 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4956 4957 @*/ 4958 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4959 { 4960 PetscErrorCode ierr; 4961 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4962 Mat_SeqAIJ *mat,*a,*b; 4963 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4964 MatScalar *aa,*ba,*cam; 4965 PetscScalar *ca; 4966 PetscMPIInt size; 4967 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4968 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4969 PetscBool match; 4970 4971 PetscFunctionBegin; 4972 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 4973 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4974 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4975 if (size == 1) { 4976 if (scall == MAT_INITIAL_MATRIX) { 4977 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 4978 *A_loc = mpimat->A; 4979 } else if (scall == MAT_REUSE_MATRIX) { 4980 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4981 } 4982 PetscFunctionReturn(0); 4983 } 4984 4985 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4986 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4987 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4988 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4989 aa = a->a; ba = b->a; 4990 if (scall == MAT_INITIAL_MATRIX) { 4991 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4992 ci[0] = 0; 4993 for (i=0; i<am; i++) { 4994 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4995 } 4996 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4997 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4998 k = 0; 4999 for (i=0; i<am; i++) { 5000 ncols_o = bi[i+1] - bi[i]; 5001 ncols_d = ai[i+1] - ai[i]; 5002 /* off-diagonal portion of A */ 5003 for (jo=0; jo<ncols_o; jo++) { 5004 col = cmap[*bj]; 5005 if (col >= cstart) break; 5006 cj[k] = col; bj++; 5007 ca[k++] = *ba++; 5008 } 5009 /* diagonal portion of A */ 5010 for (j=0; j<ncols_d; j++) { 5011 cj[k] = cstart + *aj++; 5012 ca[k++] = *aa++; 5013 } 5014 /* off-diagonal portion of A */ 5015 for (j=jo; j<ncols_o; j++) { 5016 cj[k] = cmap[*bj++]; 5017 ca[k++] = *ba++; 5018 } 5019 } 5020 /* put together the new matrix */ 5021 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5022 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5023 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5024 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5025 mat->free_a = PETSC_TRUE; 5026 mat->free_ij = PETSC_TRUE; 5027 mat->nonew = 0; 5028 } else if (scall == MAT_REUSE_MATRIX) { 5029 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5030 ci = mat->i; cj = mat->j; cam = mat->a; 5031 for (i=0; i<am; i++) { 5032 /* off-diagonal portion of A */ 5033 ncols_o = bi[i+1] - bi[i]; 5034 for (jo=0; jo<ncols_o; jo++) { 5035 col = cmap[*bj]; 5036 if (col >= cstart) break; 5037 *cam++ = *ba++; bj++; 5038 } 5039 /* diagonal portion of A */ 5040 ncols_d = ai[i+1] - ai[i]; 5041 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5042 /* off-diagonal portion of A */ 5043 for (j=jo; j<ncols_o; j++) { 5044 *cam++ = *ba++; bj++; 5045 } 5046 } 5047 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5048 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5049 PetscFunctionReturn(0); 5050 } 5051 5052 /*@C 5053 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5054 5055 Not Collective 5056 5057 Input Parameters: 5058 + A - the matrix 5059 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5060 - row, col - index sets of rows and columns to extract (or NULL) 5061 5062 Output Parameter: 5063 . A_loc - the local sequential matrix generated 5064 5065 Level: developer 5066 5067 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5068 5069 @*/ 5070 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5071 { 5072 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5073 PetscErrorCode ierr; 5074 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5075 IS isrowa,iscola; 5076 Mat *aloc; 5077 PetscBool match; 5078 5079 PetscFunctionBegin; 5080 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5081 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5082 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5083 if (!row) { 5084 start = A->rmap->rstart; end = A->rmap->rend; 5085 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5086 } else { 5087 isrowa = *row; 5088 } 5089 if (!col) { 5090 start = A->cmap->rstart; 5091 cmap = a->garray; 5092 nzA = a->A->cmap->n; 5093 nzB = a->B->cmap->n; 5094 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5095 ncols = 0; 5096 for (i=0; i<nzB; i++) { 5097 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5098 else break; 5099 } 5100 imark = i; 5101 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5102 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5103 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5104 } else { 5105 iscola = *col; 5106 } 5107 if (scall != MAT_INITIAL_MATRIX) { 5108 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5109 aloc[0] = *A_loc; 5110 } 5111 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5112 if (!col) { /* attach global id of condensed columns */ 5113 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5114 } 5115 *A_loc = aloc[0]; 5116 ierr = PetscFree(aloc);CHKERRQ(ierr); 5117 if (!row) { 5118 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5119 } 5120 if (!col) { 5121 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5122 } 5123 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5124 PetscFunctionReturn(0); 5125 } 5126 5127 /* 5128 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5129 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5130 * on a global size. 5131 * */ 5132 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5133 { 5134 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5135 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5136 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5137 PetscMPIInt owner; 5138 PetscSFNode *iremote,*oiremote; 5139 const PetscInt *lrowindices; 5140 PetscErrorCode ierr; 5141 PetscSF sf,osf; 5142 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5143 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5144 MPI_Comm comm; 5145 ISLocalToGlobalMapping mapping; 5146 5147 PetscFunctionBegin; 5148 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5149 /* plocalsize is the number of roots 5150 * nrows is the number of leaves 5151 * */ 5152 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5153 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5154 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5155 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5156 for (i=0;i<nrows;i++) { 5157 /* Find a remote index and an owner for a row 5158 * The row could be local or remote 5159 * */ 5160 owner = 0; 5161 lidx = 0; 5162 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5163 iremote[i].index = lidx; 5164 iremote[i].rank = owner; 5165 } 5166 /* Create SF to communicate how many nonzero columns for each row */ 5167 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5168 /* SF will figure out the number of nonzero colunms for each row, and their 5169 * offsets 5170 * */ 5171 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5172 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5173 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5174 5175 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5176 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5177 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5178 roffsets[0] = 0; 5179 roffsets[1] = 0; 5180 for (i=0;i<plocalsize;i++) { 5181 /* diag */ 5182 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5183 /* off diag */ 5184 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5185 /* compute offsets so that we relative location for each row */ 5186 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5187 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5188 } 5189 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5190 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5191 /* 'r' means root, and 'l' means leaf */ 5192 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5193 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5194 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5195 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5196 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5197 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5198 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5199 dntotalcols = 0; 5200 ontotalcols = 0; 5201 ncol = 0; 5202 for (i=0;i<nrows;i++) { 5203 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5204 ncol = PetscMax(pnnz[i],ncol); 5205 /* diag */ 5206 dntotalcols += nlcols[i*2+0]; 5207 /* off diag */ 5208 ontotalcols += nlcols[i*2+1]; 5209 } 5210 /* We do not need to figure the right number of columns 5211 * since all the calculations will be done by going through the raw data 5212 * */ 5213 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5214 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5215 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5216 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5217 /* diag */ 5218 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5219 /* off diag */ 5220 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5221 /* diag */ 5222 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5223 /* off diag */ 5224 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5225 dntotalcols = 0; 5226 ontotalcols = 0; 5227 ntotalcols = 0; 5228 for (i=0;i<nrows;i++) { 5229 owner = 0; 5230 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5231 /* Set iremote for diag matrix */ 5232 for (j=0;j<nlcols[i*2+0];j++) { 5233 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5234 iremote[dntotalcols].rank = owner; 5235 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5236 ilocal[dntotalcols++] = ntotalcols++; 5237 } 5238 /* off diag */ 5239 for (j=0;j<nlcols[i*2+1];j++) { 5240 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5241 oiremote[ontotalcols].rank = owner; 5242 oilocal[ontotalcols++] = ntotalcols++; 5243 } 5244 } 5245 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5246 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5247 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5248 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5249 /* P serves as roots and P_oth is leaves 5250 * Diag matrix 5251 * */ 5252 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5253 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5254 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5255 5256 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5257 /* Off diag */ 5258 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5259 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5260 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5261 /* We operate on the matrix internal data for saving memory */ 5262 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5263 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5264 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5265 /* Convert to global indices for diag matrix */ 5266 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5267 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5268 /* We want P_oth store global indices */ 5269 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5270 /* Use memory scalable approach */ 5271 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5272 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5273 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5274 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5275 /* Convert back to local indices */ 5276 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5277 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5278 nout = 0; 5279 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5280 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5281 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5282 /* Exchange values */ 5283 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5284 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5285 /* Stop PETSc from shrinking memory */ 5286 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5287 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5288 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5289 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5290 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5291 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5292 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5293 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5294 PetscFunctionReturn(0); 5295 } 5296 5297 /* 5298 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5299 * This supports MPIAIJ and MAIJ 5300 * */ 5301 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5302 { 5303 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5304 Mat_SeqAIJ *p_oth; 5305 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5306 IS rows,map; 5307 PetscHMapI hamp; 5308 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5309 MPI_Comm comm; 5310 PetscSF sf,osf; 5311 PetscBool has; 5312 PetscErrorCode ierr; 5313 5314 PetscFunctionBegin; 5315 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5316 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5317 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5318 * and then create a submatrix (that often is an overlapping matrix) 5319 * */ 5320 if (reuse == MAT_INITIAL_MATRIX) { 5321 /* Use a hash table to figure out unique keys */ 5322 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5323 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5324 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5325 count = 0; 5326 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5327 for (i=0;i<a->B->cmap->n;i++) { 5328 key = a->garray[i]/dof; 5329 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5330 if (!has) { 5331 mapping[i] = count; 5332 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5333 } else { 5334 /* Current 'i' has the same value the previous step */ 5335 mapping[i] = count-1; 5336 } 5337 } 5338 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5339 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5340 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5341 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5342 off = 0; 5343 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5344 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5345 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5346 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5347 /* In case, the matrix was already created but users want to recreate the matrix */ 5348 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5349 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5350 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5351 ierr = ISDestroy(&map);CHKERRQ(ierr); 5352 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5353 } else if (reuse == MAT_REUSE_MATRIX) { 5354 /* If matrix was already created, we simply update values using SF objects 5355 * that as attached to the matrix ealier. 5356 * */ 5357 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5358 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5359 if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet"); 5360 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5361 /* Update values in place */ 5362 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5363 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5364 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5365 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5366 } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type"); 5367 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5368 PetscFunctionReturn(0); 5369 } 5370 5371 /*@C 5372 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5373 5374 Collective on Mat 5375 5376 Input Parameters: 5377 + A,B - the matrices in mpiaij format 5378 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5379 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5380 5381 Output Parameter: 5382 + rowb, colb - index sets of rows and columns of B to extract 5383 - B_seq - the sequential matrix generated 5384 5385 Level: developer 5386 5387 @*/ 5388 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5389 { 5390 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5391 PetscErrorCode ierr; 5392 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5393 IS isrowb,iscolb; 5394 Mat *bseq=NULL; 5395 5396 PetscFunctionBegin; 5397 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5398 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5399 } 5400 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5401 5402 if (scall == MAT_INITIAL_MATRIX) { 5403 start = A->cmap->rstart; 5404 cmap = a->garray; 5405 nzA = a->A->cmap->n; 5406 nzB = a->B->cmap->n; 5407 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5408 ncols = 0; 5409 for (i=0; i<nzB; i++) { /* row < local row index */ 5410 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5411 else break; 5412 } 5413 imark = i; 5414 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5415 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5416 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5417 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5418 } else { 5419 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5420 isrowb = *rowb; iscolb = *colb; 5421 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5422 bseq[0] = *B_seq; 5423 } 5424 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5425 *B_seq = bseq[0]; 5426 ierr = PetscFree(bseq);CHKERRQ(ierr); 5427 if (!rowb) { 5428 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5429 } else { 5430 *rowb = isrowb; 5431 } 5432 if (!colb) { 5433 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5434 } else { 5435 *colb = iscolb; 5436 } 5437 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5438 PetscFunctionReturn(0); 5439 } 5440 5441 /* 5442 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5443 of the OFF-DIAGONAL portion of local A 5444 5445 Collective on Mat 5446 5447 Input Parameters: 5448 + A,B - the matrices in mpiaij format 5449 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5450 5451 Output Parameter: 5452 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5453 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5454 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5455 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5456 5457 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5458 for this matrix. This is not desirable.. 5459 5460 Level: developer 5461 5462 */ 5463 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5464 { 5465 PetscErrorCode ierr; 5466 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5467 Mat_SeqAIJ *b_oth; 5468 VecScatter ctx; 5469 MPI_Comm comm; 5470 const PetscMPIInt *rprocs,*sprocs; 5471 const PetscInt *srow,*rstarts,*sstarts; 5472 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5473 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len; 5474 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5475 MPI_Request *rwaits = NULL,*swaits = NULL; 5476 MPI_Status rstatus; 5477 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5478 5479 PetscFunctionBegin; 5480 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5481 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5482 5483 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5484 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5485 } 5486 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5487 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5488 5489 if (size == 1) { 5490 startsj_s = NULL; 5491 bufa_ptr = NULL; 5492 *B_oth = NULL; 5493 PetscFunctionReturn(0); 5494 } 5495 5496 ctx = a->Mvctx; 5497 tag = ((PetscObject)ctx)->tag; 5498 5499 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5500 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5501 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5502 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5503 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5504 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5505 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5506 5507 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5508 if (scall == MAT_INITIAL_MATRIX) { 5509 /* i-array */ 5510 /*---------*/ 5511 /* post receives */ 5512 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5513 for (i=0; i<nrecvs; i++) { 5514 rowlen = rvalues + rstarts[i]*rbs; 5515 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5516 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5517 } 5518 5519 /* pack the outgoing message */ 5520 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5521 5522 sstartsj[0] = 0; 5523 rstartsj[0] = 0; 5524 len = 0; /* total length of j or a array to be sent */ 5525 if (nsends) { 5526 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5527 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5528 } 5529 for (i=0; i<nsends; i++) { 5530 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5531 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5532 for (j=0; j<nrows; j++) { 5533 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5534 for (l=0; l<sbs; l++) { 5535 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5536 5537 rowlen[j*sbs+l] = ncols; 5538 5539 len += ncols; 5540 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5541 } 5542 k++; 5543 } 5544 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5545 5546 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5547 } 5548 /* recvs and sends of i-array are completed */ 5549 i = nrecvs; 5550 while (i--) { 5551 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5552 } 5553 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5554 ierr = PetscFree(svalues);CHKERRQ(ierr); 5555 5556 /* allocate buffers for sending j and a arrays */ 5557 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5558 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5559 5560 /* create i-array of B_oth */ 5561 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5562 5563 b_othi[0] = 0; 5564 len = 0; /* total length of j or a array to be received */ 5565 k = 0; 5566 for (i=0; i<nrecvs; i++) { 5567 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5568 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5569 for (j=0; j<nrows; j++) { 5570 b_othi[k+1] = b_othi[k] + rowlen[j]; 5571 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5572 k++; 5573 } 5574 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5575 } 5576 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5577 5578 /* allocate space for j and a arrrays of B_oth */ 5579 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5580 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5581 5582 /* j-array */ 5583 /*---------*/ 5584 /* post receives of j-array */ 5585 for (i=0; i<nrecvs; i++) { 5586 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5587 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5588 } 5589 5590 /* pack the outgoing message j-array */ 5591 if (nsends) k = sstarts[0]; 5592 for (i=0; i<nsends; i++) { 5593 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5594 bufJ = bufj+sstartsj[i]; 5595 for (j=0; j<nrows; j++) { 5596 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5597 for (ll=0; ll<sbs; ll++) { 5598 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5599 for (l=0; l<ncols; l++) { 5600 *bufJ++ = cols[l]; 5601 } 5602 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5603 } 5604 } 5605 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5606 } 5607 5608 /* recvs and sends of j-array are completed */ 5609 i = nrecvs; 5610 while (i--) { 5611 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5612 } 5613 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5614 } else if (scall == MAT_REUSE_MATRIX) { 5615 sstartsj = *startsj_s; 5616 rstartsj = *startsj_r; 5617 bufa = *bufa_ptr; 5618 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5619 b_otha = b_oth->a; 5620 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5621 5622 /* a-array */ 5623 /*---------*/ 5624 /* post receives of a-array */ 5625 for (i=0; i<nrecvs; i++) { 5626 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5627 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5628 } 5629 5630 /* pack the outgoing message a-array */ 5631 if (nsends) k = sstarts[0]; 5632 for (i=0; i<nsends; i++) { 5633 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5634 bufA = bufa+sstartsj[i]; 5635 for (j=0; j<nrows; j++) { 5636 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5637 for (ll=0; ll<sbs; ll++) { 5638 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5639 for (l=0; l<ncols; l++) { 5640 *bufA++ = vals[l]; 5641 } 5642 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5643 } 5644 } 5645 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5646 } 5647 /* recvs and sends of a-array are completed */ 5648 i = nrecvs; 5649 while (i--) { 5650 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5651 } 5652 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5653 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5654 5655 if (scall == MAT_INITIAL_MATRIX) { 5656 /* put together the new matrix */ 5657 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5658 5659 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5660 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5661 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5662 b_oth->free_a = PETSC_TRUE; 5663 b_oth->free_ij = PETSC_TRUE; 5664 b_oth->nonew = 0; 5665 5666 ierr = PetscFree(bufj);CHKERRQ(ierr); 5667 if (!startsj_s || !bufa_ptr) { 5668 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5669 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5670 } else { 5671 *startsj_s = sstartsj; 5672 *startsj_r = rstartsj; 5673 *bufa_ptr = bufa; 5674 } 5675 } 5676 5677 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5678 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5679 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5680 PetscFunctionReturn(0); 5681 } 5682 5683 /*@C 5684 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5685 5686 Not Collective 5687 5688 Input Parameters: 5689 . A - The matrix in mpiaij format 5690 5691 Output Parameter: 5692 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5693 . colmap - A map from global column index to local index into lvec 5694 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5695 5696 Level: developer 5697 5698 @*/ 5699 #if defined(PETSC_USE_CTABLE) 5700 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5701 #else 5702 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5703 #endif 5704 { 5705 Mat_MPIAIJ *a; 5706 5707 PetscFunctionBegin; 5708 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5709 PetscValidPointer(lvec, 2); 5710 PetscValidPointer(colmap, 3); 5711 PetscValidPointer(multScatter, 4); 5712 a = (Mat_MPIAIJ*) A->data; 5713 if (lvec) *lvec = a->lvec; 5714 if (colmap) *colmap = a->colmap; 5715 if (multScatter) *multScatter = a->Mvctx; 5716 PetscFunctionReturn(0); 5717 } 5718 5719 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5720 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5721 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5722 #if defined(PETSC_HAVE_MKL_SPARSE) 5723 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5724 #endif 5725 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5726 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5727 #if defined(PETSC_HAVE_ELEMENTAL) 5728 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5729 #endif 5730 #if defined(PETSC_HAVE_SCALAPACK) 5731 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*); 5732 #endif 5733 #if defined(PETSC_HAVE_HYPRE) 5734 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5735 #endif 5736 #if defined(PETSC_HAVE_CUDA) 5737 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*); 5738 #endif 5739 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5740 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5741 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5742 5743 /* 5744 Computes (B'*A')' since computing B*A directly is untenable 5745 5746 n p p 5747 [ ] [ ] [ ] 5748 m [ A ] * n [ B ] = m [ C ] 5749 [ ] [ ] [ ] 5750 5751 */ 5752 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5753 { 5754 PetscErrorCode ierr; 5755 Mat At,Bt,Ct; 5756 5757 PetscFunctionBegin; 5758 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5759 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5760 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr); 5761 ierr = MatDestroy(&At);CHKERRQ(ierr); 5762 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5763 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5764 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5765 PetscFunctionReturn(0); 5766 } 5767 5768 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5769 { 5770 PetscErrorCode ierr; 5771 PetscBool cisdense; 5772 5773 PetscFunctionBegin; 5774 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5775 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr); 5776 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5777 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 5778 if (!cisdense) { 5779 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 5780 } 5781 ierr = MatSetUp(C);CHKERRQ(ierr); 5782 5783 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5784 PetscFunctionReturn(0); 5785 } 5786 5787 /* ----------------------------------------------------------------*/ 5788 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5789 { 5790 Mat_Product *product = C->product; 5791 Mat A = product->A,B=product->B; 5792 5793 PetscFunctionBegin; 5794 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5795 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5796 5797 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5798 C->ops->productsymbolic = MatProductSymbolic_AB; 5799 PetscFunctionReturn(0); 5800 } 5801 5802 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5803 { 5804 PetscErrorCode ierr; 5805 Mat_Product *product = C->product; 5806 5807 PetscFunctionBegin; 5808 if (product->type == MATPRODUCT_AB) { 5809 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5810 } 5811 PetscFunctionReturn(0); 5812 } 5813 /* ----------------------------------------------------------------*/ 5814 5815 /*MC 5816 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5817 5818 Options Database Keys: 5819 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5820 5821 Level: beginner 5822 5823 Notes: 5824 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5825 in this case the values associated with the rows and columns one passes in are set to zero 5826 in the matrix 5827 5828 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5829 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5830 5831 .seealso: MatCreateAIJ() 5832 M*/ 5833 5834 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5835 { 5836 Mat_MPIAIJ *b; 5837 PetscErrorCode ierr; 5838 PetscMPIInt size; 5839 5840 PetscFunctionBegin; 5841 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5842 5843 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5844 B->data = (void*)b; 5845 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5846 B->assembled = PETSC_FALSE; 5847 B->insertmode = NOT_SET_VALUES; 5848 b->size = size; 5849 5850 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5851 5852 /* build cache for off array entries formed */ 5853 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5854 5855 b->donotstash = PETSC_FALSE; 5856 b->colmap = NULL; 5857 b->garray = NULL; 5858 b->roworiented = PETSC_TRUE; 5859 5860 /* stuff used for matrix vector multiply */ 5861 b->lvec = NULL; 5862 b->Mvctx = NULL; 5863 5864 /* stuff for MatGetRow() */ 5865 b->rowindices = NULL; 5866 b->rowvalues = NULL; 5867 b->getrowactive = PETSC_FALSE; 5868 5869 /* flexible pointer used in CUSP/CUSPARSE classes */ 5870 b->spptr = NULL; 5871 5872 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5873 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5874 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5875 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5876 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5877 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5878 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5879 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5880 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5881 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5882 #if defined(PETSC_HAVE_MKL_SPARSE) 5883 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5884 #endif 5885 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5886 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 5887 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5888 #if defined(PETSC_HAVE_ELEMENTAL) 5889 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5890 #endif 5891 #if defined(PETSC_HAVE_SCALAPACK) 5892 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr); 5893 #endif 5894 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5895 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5896 #if defined(PETSC_HAVE_HYPRE) 5897 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5898 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5899 #endif 5900 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 5901 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 5902 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5903 PetscFunctionReturn(0); 5904 } 5905 5906 /*@C 5907 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5908 and "off-diagonal" part of the matrix in CSR format. 5909 5910 Collective 5911 5912 Input Parameters: 5913 + comm - MPI communicator 5914 . m - number of local rows (Cannot be PETSC_DECIDE) 5915 . n - This value should be the same as the local size used in creating the 5916 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5917 calculated if N is given) For square matrices n is almost always m. 5918 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5919 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5920 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5921 . j - column indices 5922 . a - matrix values 5923 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5924 . oj - column indices 5925 - oa - matrix values 5926 5927 Output Parameter: 5928 . mat - the matrix 5929 5930 Level: advanced 5931 5932 Notes: 5933 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5934 must free the arrays once the matrix has been destroyed and not before. 5935 5936 The i and j indices are 0 based 5937 5938 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5939 5940 This sets local rows and cannot be used to set off-processor values. 5941 5942 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5943 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5944 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5945 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5946 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5947 communication if it is known that only local entries will be set. 5948 5949 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5950 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5951 @*/ 5952 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5953 { 5954 PetscErrorCode ierr; 5955 Mat_MPIAIJ *maij; 5956 5957 PetscFunctionBegin; 5958 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5959 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5960 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5961 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5962 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5963 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5964 maij = (Mat_MPIAIJ*) (*mat)->data; 5965 5966 (*mat)->preallocated = PETSC_TRUE; 5967 5968 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5969 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5970 5971 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5972 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5973 5974 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5975 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5976 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5977 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5978 5979 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5980 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5981 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5982 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5983 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5984 PetscFunctionReturn(0); 5985 } 5986 5987 /* 5988 Special version for direct calls from Fortran 5989 */ 5990 #include <petsc/private/fortranimpl.h> 5991 5992 /* Change these macros so can be used in void function */ 5993 #undef CHKERRQ 5994 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5995 #undef SETERRQ2 5996 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5997 #undef SETERRQ3 5998 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5999 #undef SETERRQ 6000 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6001 6002 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6003 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6004 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6005 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6006 #else 6007 #endif 6008 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6009 { 6010 Mat mat = *mmat; 6011 PetscInt m = *mm, n = *mn; 6012 InsertMode addv = *maddv; 6013 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6014 PetscScalar value; 6015 PetscErrorCode ierr; 6016 6017 MatCheckPreallocated(mat,1); 6018 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6019 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6020 { 6021 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6022 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6023 PetscBool roworiented = aij->roworiented; 6024 6025 /* Some Variables required in the macro */ 6026 Mat A = aij->A; 6027 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6028 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6029 MatScalar *aa = a->a; 6030 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6031 Mat B = aij->B; 6032 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6033 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6034 MatScalar *ba = b->a; 6035 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6036 * cannot use "#if defined" inside a macro. */ 6037 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6038 6039 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6040 PetscInt nonew = a->nonew; 6041 MatScalar *ap1,*ap2; 6042 6043 PetscFunctionBegin; 6044 for (i=0; i<m; i++) { 6045 if (im[i] < 0) continue; 6046 if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6047 if (im[i] >= rstart && im[i] < rend) { 6048 row = im[i] - rstart; 6049 lastcol1 = -1; 6050 rp1 = aj + ai[row]; 6051 ap1 = aa + ai[row]; 6052 rmax1 = aimax[row]; 6053 nrow1 = ailen[row]; 6054 low1 = 0; 6055 high1 = nrow1; 6056 lastcol2 = -1; 6057 rp2 = bj + bi[row]; 6058 ap2 = ba + bi[row]; 6059 rmax2 = bimax[row]; 6060 nrow2 = bilen[row]; 6061 low2 = 0; 6062 high2 = nrow2; 6063 6064 for (j=0; j<n; j++) { 6065 if (roworiented) value = v[i*n+j]; 6066 else value = v[i+j*m]; 6067 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6068 if (in[j] >= cstart && in[j] < cend) { 6069 col = in[j] - cstart; 6070 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6071 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6072 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6073 #endif 6074 } else if (in[j] < 0) continue; 6075 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 6076 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6077 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 6078 } else { 6079 if (mat->was_assembled) { 6080 if (!aij->colmap) { 6081 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6082 } 6083 #if defined(PETSC_USE_CTABLE) 6084 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6085 col--; 6086 #else 6087 col = aij->colmap[in[j]] - 1; 6088 #endif 6089 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6090 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6091 col = in[j]; 6092 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6093 B = aij->B; 6094 b = (Mat_SeqAIJ*)B->data; 6095 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6096 rp2 = bj + bi[row]; 6097 ap2 = ba + bi[row]; 6098 rmax2 = bimax[row]; 6099 nrow2 = bilen[row]; 6100 low2 = 0; 6101 high2 = nrow2; 6102 bm = aij->B->rmap->n; 6103 ba = b->a; 6104 inserted = PETSC_FALSE; 6105 } 6106 } else col = in[j]; 6107 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6108 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6109 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6110 #endif 6111 } 6112 } 6113 } else if (!aij->donotstash) { 6114 if (roworiented) { 6115 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6116 } else { 6117 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6118 } 6119 } 6120 } 6121 } 6122 PetscFunctionReturnVoid(); 6123 } 6124