1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->boundtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 inserted = PETSC_TRUE; \ 468 goto a_noinsert; \ 469 } \ 470 } \ 471 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 472 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 473 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 474 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 475 N = nrow1++ - 1; a->nz++; high1++; \ 476 /* shift up all the later entries in this row */ \ 477 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 478 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 479 rp1[_i] = col; \ 480 ap1[_i] = value; \ 481 A->nonzerostate++;\ 482 a_noinsert: ; \ 483 ailen[row] = nrow1; \ 484 } 485 486 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 487 { \ 488 if (col <= lastcol2) low2 = 0; \ 489 else high2 = nrow2; \ 490 lastcol2 = col; \ 491 while (high2-low2 > 5) { \ 492 t = (low2+high2)/2; \ 493 if (rp2[t] > col) high2 = t; \ 494 else low2 = t; \ 495 } \ 496 for (_i=low2; _i<high2; _i++) { \ 497 if (rp2[_i] > col) break; \ 498 if (rp2[_i] == col) { \ 499 if (addv == ADD_VALUES) { \ 500 ap2[_i] += value; \ 501 (void)PetscLogFlops(1.0); \ 502 } \ 503 else ap2[_i] = value; \ 504 inserted = PETSC_TRUE; \ 505 goto b_noinsert; \ 506 } \ 507 } \ 508 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 509 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 510 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 511 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 512 N = nrow2++ - 1; b->nz++; high2++; \ 513 /* shift up all the later entries in this row */ \ 514 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 515 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 516 rp2[_i] = col; \ 517 ap2[_i] = value; \ 518 B->nonzerostate++; \ 519 b_noinsert: ; \ 520 bilen[row] = nrow2; \ 521 } 522 523 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 524 { 525 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 526 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 527 PetscErrorCode ierr; 528 PetscInt l,*garray = mat->garray,diag; 529 530 PetscFunctionBegin; 531 /* code only works for square matrices A */ 532 533 /* find size of row to the left of the diagonal part */ 534 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 535 row = row - diag; 536 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 537 if (garray[b->j[b->i[row]+l]] > diag) break; 538 } 539 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 540 541 /* diagonal part */ 542 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 543 544 /* right of diagonal part */ 545 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 546 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 547 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU; 548 #endif 549 PetscFunctionReturn(0); 550 } 551 552 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 553 { 554 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 555 PetscScalar value = 0.0; 556 PetscErrorCode ierr; 557 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 558 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 559 PetscBool roworiented = aij->roworiented; 560 561 /* Some Variables required in the macro */ 562 Mat A = aij->A; 563 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 564 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 565 MatScalar *aa = a->a; 566 PetscBool ignorezeroentries = a->ignorezeroentries; 567 Mat B = aij->B; 568 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 569 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 570 MatScalar *ba = b->a; 571 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 572 * cannot use "#if defined" inside a macro. */ 573 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 574 575 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 576 PetscInt nonew; 577 MatScalar *ap1,*ap2; 578 579 PetscFunctionBegin; 580 for (i=0; i<m; i++) { 581 if (im[i] < 0) continue; 582 #if defined(PETSC_USE_DEBUG) 583 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 584 #endif 585 if (im[i] >= rstart && im[i] < rend) { 586 row = im[i] - rstart; 587 lastcol1 = -1; 588 rp1 = aj + ai[row]; 589 ap1 = aa + ai[row]; 590 rmax1 = aimax[row]; 591 nrow1 = ailen[row]; 592 low1 = 0; 593 high1 = nrow1; 594 lastcol2 = -1; 595 rp2 = bj + bi[row]; 596 ap2 = ba + bi[row]; 597 rmax2 = bimax[row]; 598 nrow2 = bilen[row]; 599 low2 = 0; 600 high2 = nrow2; 601 602 for (j=0; j<n; j++) { 603 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 604 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 605 if (in[j] >= cstart && in[j] < cend) { 606 col = in[j] - cstart; 607 nonew = a->nonew; 608 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 609 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 610 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 611 #endif 612 } else if (in[j] < 0) continue; 613 #if defined(PETSC_USE_DEBUG) 614 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 615 #endif 616 else { 617 if (mat->was_assembled) { 618 if (!aij->colmap) { 619 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 620 } 621 #if defined(PETSC_USE_CTABLE) 622 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 623 col--; 624 #else 625 col = aij->colmap[in[j]] - 1; 626 #endif 627 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 628 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 629 col = in[j]; 630 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 631 B = aij->B; 632 b = (Mat_SeqAIJ*)B->data; 633 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 634 rp2 = bj + bi[row]; 635 ap2 = ba + bi[row]; 636 rmax2 = bimax[row]; 637 nrow2 = bilen[row]; 638 low2 = 0; 639 high2 = nrow2; 640 bm = aij->B->rmap->n; 641 ba = b->a; 642 inserted = PETSC_FALSE; 643 } else if (col < 0) { 644 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 645 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 646 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 647 } 648 } else col = in[j]; 649 nonew = b->nonew; 650 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 651 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 652 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 653 #endif 654 } 655 } 656 } else { 657 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 658 if (!aij->donotstash) { 659 mat->assembled = PETSC_FALSE; 660 if (roworiented) { 661 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 662 } else { 663 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 664 } 665 } 666 } 667 } 668 PetscFunctionReturn(0); 669 } 670 671 /* 672 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 673 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 674 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 675 */ 676 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 677 { 678 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 679 Mat A = aij->A; /* diagonal part of the matrix */ 680 Mat B = aij->B; /* offdiagonal part of the matrix */ 681 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 682 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 683 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 684 PetscInt *ailen = a->ilen,*aj = a->j; 685 PetscInt *bilen = b->ilen,*bj = b->j; 686 PetscInt am = aij->A->rmap->n,j; 687 PetscInt diag_so_far = 0,dnz; 688 PetscInt offd_so_far = 0,onz; 689 690 PetscFunctionBegin; 691 /* Iterate over all rows of the matrix */ 692 for (j=0; j<am; j++) { 693 dnz = onz = 0; 694 /* Iterate over all non-zero columns of the current row */ 695 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 696 /* If column is in the diagonal */ 697 if (mat_j[col] >= cstart && mat_j[col] < cend) { 698 aj[diag_so_far++] = mat_j[col] - cstart; 699 dnz++; 700 } else { /* off-diagonal entries */ 701 bj[offd_so_far++] = mat_j[col]; 702 onz++; 703 } 704 } 705 ailen[j] = dnz; 706 bilen[j] = onz; 707 } 708 PetscFunctionReturn(0); 709 } 710 711 /* 712 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 713 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 714 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 715 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 716 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 717 */ 718 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 719 { 720 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 721 Mat A = aij->A; /* diagonal part of the matrix */ 722 Mat B = aij->B; /* offdiagonal part of the matrix */ 723 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 724 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 725 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 726 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 727 PetscInt *ailen = a->ilen,*aj = a->j; 728 PetscInt *bilen = b->ilen,*bj = b->j; 729 PetscInt am = aij->A->rmap->n,j; 730 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 731 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 732 PetscScalar *aa = a->a,*ba = b->a; 733 734 PetscFunctionBegin; 735 /* Iterate over all rows of the matrix */ 736 for (j=0; j<am; j++) { 737 dnz_row = onz_row = 0; 738 rowstart_offd = full_offd_i[j]; 739 rowstart_diag = full_diag_i[j]; 740 /* Iterate over all non-zero columns of the current row */ 741 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 742 /* If column is in the diagonal */ 743 if (mat_j[col] >= cstart && mat_j[col] < cend) { 744 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 745 aa[rowstart_diag+dnz_row] = mat_a[col]; 746 dnz_row++; 747 } else { /* off-diagonal entries */ 748 bj[rowstart_offd+onz_row] = mat_j[col]; 749 ba[rowstart_offd+onz_row] = mat_a[col]; 750 onz_row++; 751 } 752 } 753 ailen[j] = dnz_row; 754 bilen[j] = onz_row; 755 } 756 PetscFunctionReturn(0); 757 } 758 759 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 762 PetscErrorCode ierr; 763 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 764 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 765 766 PetscFunctionBegin; 767 for (i=0; i<m; i++) { 768 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 769 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 770 if (idxm[i] >= rstart && idxm[i] < rend) { 771 row = idxm[i] - rstart; 772 for (j=0; j<n; j++) { 773 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 774 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 775 if (idxn[j] >= cstart && idxn[j] < cend) { 776 col = idxn[j] - cstart; 777 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 778 } else { 779 if (!aij->colmap) { 780 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 781 } 782 #if defined(PETSC_USE_CTABLE) 783 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 784 col--; 785 #else 786 col = aij->colmap[idxn[j]] - 1; 787 #endif 788 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 789 else { 790 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 791 } 792 } 793 } 794 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 795 } 796 PetscFunctionReturn(0); 797 } 798 799 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 800 801 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 PetscErrorCode ierr; 805 PetscInt nstash,reallocs; 806 807 PetscFunctionBegin; 808 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 809 810 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 811 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 812 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 813 PetscFunctionReturn(0); 814 } 815 816 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 817 { 818 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 819 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 820 PetscErrorCode ierr; 821 PetscMPIInt n; 822 PetscInt i,j,rstart,ncols,flg; 823 PetscInt *row,*col; 824 PetscBool other_disassembled; 825 PetscScalar *val; 826 827 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 828 829 PetscFunctionBegin; 830 if (!aij->donotstash && !mat->nooffprocentries) { 831 while (1) { 832 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 833 if (!flg) break; 834 835 for (i=0; i<n; ) { 836 /* Now identify the consecutive vals belonging to the same row */ 837 for (j=i,rstart=row[j]; j<n; j++) { 838 if (row[j] != rstart) break; 839 } 840 if (j < n) ncols = j-i; 841 else ncols = n-i; 842 /* Now assemble all these values with a single function call */ 843 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 844 i = j; 845 } 846 } 847 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 848 } 849 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 850 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 851 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 852 if (mat->boundtocpu) { 853 ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr); 854 ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr); 855 } 856 #endif 857 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 858 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 859 860 /* determine if any processor has disassembled, if so we must 861 also disassemble ourself, in order that we may reassemble. */ 862 /* 863 if nonzero structure of submatrix B cannot change then we know that 864 no processor disassembled thus we can skip this stuff 865 */ 866 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 867 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 868 if (mat->was_assembled && !other_disassembled) { 869 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 870 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 871 #endif 872 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 873 } 874 } 875 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 876 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 877 } 878 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 879 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 880 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 881 #endif 882 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 883 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 884 885 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 886 887 aij->rowvalues = 0; 888 889 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 890 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 891 892 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 893 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 894 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 895 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 896 } 897 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 898 mat->offloadmask = PETSC_OFFLOAD_BOTH; 899 #endif 900 PetscFunctionReturn(0); 901 } 902 903 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 904 { 905 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 906 PetscErrorCode ierr; 907 908 PetscFunctionBegin; 909 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 910 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 911 PetscFunctionReturn(0); 912 } 913 914 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 915 { 916 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 917 PetscObjectState sA, sB; 918 PetscInt *lrows; 919 PetscInt r, len; 920 PetscBool cong, lch, gch; 921 PetscErrorCode ierr; 922 923 PetscFunctionBegin; 924 /* get locally owned rows */ 925 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 926 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 927 /* fix right hand side if needed */ 928 if (x && b) { 929 const PetscScalar *xx; 930 PetscScalar *bb; 931 932 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 933 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 934 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 935 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 936 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 937 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 938 } 939 940 sA = mat->A->nonzerostate; 941 sB = mat->B->nonzerostate; 942 943 if (diag != 0.0 && cong) { 944 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 945 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 946 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 947 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 948 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 949 PetscInt nnwA, nnwB; 950 PetscBool nnzA, nnzB; 951 952 nnwA = aijA->nonew; 953 nnwB = aijB->nonew; 954 nnzA = aijA->keepnonzeropattern; 955 nnzB = aijB->keepnonzeropattern; 956 if (!nnzA) { 957 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 958 aijA->nonew = 0; 959 } 960 if (!nnzB) { 961 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 962 aijB->nonew = 0; 963 } 964 /* Must zero here before the next loop */ 965 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 966 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 967 for (r = 0; r < len; ++r) { 968 const PetscInt row = lrows[r] + A->rmap->rstart; 969 if (row >= A->cmap->N) continue; 970 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 971 } 972 aijA->nonew = nnwA; 973 aijB->nonew = nnwB; 974 } else { 975 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 976 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 977 } 978 ierr = PetscFree(lrows);CHKERRQ(ierr); 979 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 980 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 981 982 /* reduce nonzerostate */ 983 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 984 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 985 if (gch) A->nonzerostate++; 986 PetscFunctionReturn(0); 987 } 988 989 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 990 { 991 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 992 PetscErrorCode ierr; 993 PetscMPIInt n = A->rmap->n; 994 PetscInt i,j,r,m,len = 0; 995 PetscInt *lrows,*owners = A->rmap->range; 996 PetscMPIInt p = 0; 997 PetscSFNode *rrows; 998 PetscSF sf; 999 const PetscScalar *xx; 1000 PetscScalar *bb,*mask; 1001 Vec xmask,lmask; 1002 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 1003 const PetscInt *aj, *ii,*ridx; 1004 PetscScalar *aa; 1005 1006 PetscFunctionBegin; 1007 /* Create SF where leaves are input rows and roots are owned rows */ 1008 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 1009 for (r = 0; r < n; ++r) lrows[r] = -1; 1010 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 1011 for (r = 0; r < N; ++r) { 1012 const PetscInt idx = rows[r]; 1013 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 1014 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1015 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 1016 } 1017 rrows[r].rank = p; 1018 rrows[r].index = rows[r] - owners[p]; 1019 } 1020 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1021 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1022 /* Collect flags for rows to be zeroed */ 1023 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1024 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1025 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1026 /* Compress and put in row numbers */ 1027 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1028 /* zero diagonal part of matrix */ 1029 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1030 /* handle off diagonal part of matrix */ 1031 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1032 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1033 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1034 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1035 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1036 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1038 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1039 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1040 PetscBool cong; 1041 1042 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1043 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1044 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1045 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1046 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1047 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1048 } 1049 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1050 /* remove zeroed rows of off diagonal matrix */ 1051 ii = aij->i; 1052 for (i=0; i<len; i++) { 1053 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1054 } 1055 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1056 if (aij->compressedrow.use) { 1057 m = aij->compressedrow.nrows; 1058 ii = aij->compressedrow.i; 1059 ridx = aij->compressedrow.rindex; 1060 for (i=0; i<m; i++) { 1061 n = ii[i+1] - ii[i]; 1062 aj = aij->j + ii[i]; 1063 aa = aij->a + ii[i]; 1064 1065 for (j=0; j<n; j++) { 1066 if (PetscAbsScalar(mask[*aj])) { 1067 if (b) bb[*ridx] -= *aa*xx[*aj]; 1068 *aa = 0.0; 1069 } 1070 aa++; 1071 aj++; 1072 } 1073 ridx++; 1074 } 1075 } else { /* do not use compressed row format */ 1076 m = l->B->rmap->n; 1077 for (i=0; i<m; i++) { 1078 n = ii[i+1] - ii[i]; 1079 aj = aij->j + ii[i]; 1080 aa = aij->a + ii[i]; 1081 for (j=0; j<n; j++) { 1082 if (PetscAbsScalar(mask[*aj])) { 1083 if (b) bb[i] -= *aa*xx[*aj]; 1084 *aa = 0.0; 1085 } 1086 aa++; 1087 aj++; 1088 } 1089 } 1090 } 1091 if (x && b) { 1092 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1093 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1094 } 1095 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1096 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1097 ierr = PetscFree(lrows);CHKERRQ(ierr); 1098 1099 /* only change matrix nonzero state if pattern was allowed to be changed */ 1100 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1101 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1102 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1103 } 1104 PetscFunctionReturn(0); 1105 } 1106 1107 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1108 { 1109 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1110 PetscErrorCode ierr; 1111 PetscInt nt; 1112 VecScatter Mvctx = a->Mvctx; 1113 1114 PetscFunctionBegin; 1115 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1116 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1117 1118 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1120 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1121 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1128 PetscErrorCode ierr; 1129 1130 PetscFunctionBegin; 1131 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1132 PetscFunctionReturn(0); 1133 } 1134 1135 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1136 { 1137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1138 PetscErrorCode ierr; 1139 VecScatter Mvctx = a->Mvctx; 1140 1141 PetscFunctionBegin; 1142 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1143 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1144 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1145 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1146 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1151 { 1152 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1153 PetscErrorCode ierr; 1154 1155 PetscFunctionBegin; 1156 /* do nondiagonal part */ 1157 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1158 /* do local part */ 1159 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1160 /* add partial results together */ 1161 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1162 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1167 { 1168 MPI_Comm comm; 1169 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1170 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1171 IS Me,Notme; 1172 PetscErrorCode ierr; 1173 PetscInt M,N,first,last,*notme,i; 1174 PetscBool lf; 1175 PetscMPIInt size; 1176 1177 PetscFunctionBegin; 1178 /* Easy test: symmetric diagonal block */ 1179 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1180 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1181 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1182 if (!*f) PetscFunctionReturn(0); 1183 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1184 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1185 if (size == 1) PetscFunctionReturn(0); 1186 1187 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1188 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1189 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1190 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1191 for (i=0; i<first; i++) notme[i] = i; 1192 for (i=last; i<M; i++) notme[i-last+first] = i; 1193 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1194 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1195 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1196 Aoff = Aoffs[0]; 1197 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1198 Boff = Boffs[0]; 1199 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1200 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1201 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1202 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1203 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1204 ierr = PetscFree(notme);CHKERRQ(ierr); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1209 { 1210 PetscErrorCode ierr; 1211 1212 PetscFunctionBegin; 1213 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1218 { 1219 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1220 PetscErrorCode ierr; 1221 1222 PetscFunctionBegin; 1223 /* do nondiagonal part */ 1224 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1225 /* do local part */ 1226 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1227 /* add partial results together */ 1228 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1229 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 /* 1234 This only works correctly for square matrices where the subblock A->A is the 1235 diagonal block 1236 */ 1237 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1238 { 1239 PetscErrorCode ierr; 1240 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1241 1242 PetscFunctionBegin; 1243 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1244 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1245 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1246 PetscFunctionReturn(0); 1247 } 1248 1249 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1250 { 1251 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1252 PetscErrorCode ierr; 1253 1254 PetscFunctionBegin; 1255 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1256 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1261 { 1262 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1263 PetscErrorCode ierr; 1264 1265 PetscFunctionBegin; 1266 #if defined(PETSC_USE_LOG) 1267 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1268 #endif 1269 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1270 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1271 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1272 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1273 #if defined(PETSC_USE_CTABLE) 1274 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1275 #else 1276 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1277 #endif 1278 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1279 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1280 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1281 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1282 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1283 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1284 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1285 1286 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1293 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1296 #if defined(PETSC_HAVE_ELEMENTAL) 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1298 #endif 1299 #if defined(PETSC_HAVE_HYPRE) 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1302 #endif 1303 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1304 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr); 1305 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1310 { 1311 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1312 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1313 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1314 const PetscInt *garray = aij->garray; 1315 PetscInt header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb; 1316 PetscInt *rowlens; 1317 PetscInt *colidxs; 1318 PetscScalar *matvals; 1319 PetscErrorCode ierr; 1320 1321 PetscFunctionBegin; 1322 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 1323 1324 M = mat->rmap->N; 1325 N = mat->cmap->N; 1326 m = mat->rmap->n; 1327 rs = mat->rmap->rstart; 1328 cs = mat->cmap->rstart; 1329 nz = A->nz + B->nz; 1330 1331 /* write matrix header */ 1332 header[0] = MAT_FILE_CLASSID; 1333 header[1] = M; header[2] = N; header[3] = nz; 1334 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1335 ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr); 1336 1337 /* fill in and store row lengths */ 1338 ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr); 1339 for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1340 ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr); 1341 ierr = PetscFree(rowlens);CHKERRQ(ierr); 1342 1343 /* fill in and store column indices */ 1344 ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr); 1345 for (cnt=0, i=0; i<m; i++) { 1346 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1347 if (garray[B->j[jb]] > cs) break; 1348 colidxs[cnt++] = garray[B->j[jb]]; 1349 } 1350 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1351 colidxs[cnt++] = A->j[ja] + cs; 1352 for (; jb<B->i[i+1]; jb++) 1353 colidxs[cnt++] = garray[B->j[jb]]; 1354 } 1355 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1356 ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 1357 ierr = PetscFree(colidxs);CHKERRQ(ierr); 1358 1359 /* fill in and store nonzero values */ 1360 ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr); 1361 for (cnt=0, i=0; i<m; i++) { 1362 for (jb=B->i[i]; jb<B->i[i+1]; jb++) { 1363 if (garray[B->j[jb]] > cs) break; 1364 matvals[cnt++] = B->a[jb]; 1365 } 1366 for (ja=A->i[i]; ja<A->i[i+1]; ja++) 1367 matvals[cnt++] = A->a[ja]; 1368 for (; jb<B->i[i+1]; jb++) 1369 matvals[cnt++] = B->a[jb]; 1370 } 1371 if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz); 1372 ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 1373 ierr = PetscFree(matvals);CHKERRQ(ierr); 1374 1375 /* write block size option to the viewer's .info file */ 1376 ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 #include <petscdraw.h> 1381 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1382 { 1383 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1384 PetscErrorCode ierr; 1385 PetscMPIInt rank = aij->rank,size = aij->size; 1386 PetscBool isdraw,iascii,isbinary; 1387 PetscViewer sviewer; 1388 PetscViewerFormat format; 1389 1390 PetscFunctionBegin; 1391 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1392 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1393 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1394 if (iascii) { 1395 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1396 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1397 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1398 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1399 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1400 for (i=0; i<(PetscInt)size; i++) { 1401 nmax = PetscMax(nmax,nz[i]); 1402 nmin = PetscMin(nmin,nz[i]); 1403 navg += nz[i]; 1404 } 1405 ierr = PetscFree(nz);CHKERRQ(ierr); 1406 navg = navg/size; 1407 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1408 PetscFunctionReturn(0); 1409 } 1410 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1411 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1412 MatInfo info; 1413 PetscBool inodes; 1414 1415 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1416 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1417 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1418 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1419 if (!inodes) { 1420 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1421 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1422 } else { 1423 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1424 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1425 } 1426 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1427 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1428 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1429 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1430 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1431 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1432 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1433 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1436 PetscInt inodecount,inodelimit,*inodes; 1437 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1438 if (inodes) { 1439 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1440 } else { 1441 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1442 } 1443 PetscFunctionReturn(0); 1444 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1445 PetscFunctionReturn(0); 1446 } 1447 } else if (isbinary) { 1448 if (size == 1) { 1449 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1450 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1451 } else { 1452 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1453 } 1454 PetscFunctionReturn(0); 1455 } else if (iascii && size == 1) { 1456 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1457 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1458 PetscFunctionReturn(0); 1459 } else if (isdraw) { 1460 PetscDraw draw; 1461 PetscBool isnull; 1462 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1463 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1464 if (isnull) PetscFunctionReturn(0); 1465 } 1466 1467 { /* assemble the entire matrix onto first processor */ 1468 Mat A = NULL, Av; 1469 IS isrow,iscol; 1470 1471 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1472 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1473 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1474 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1475 /* The commented code uses MatCreateSubMatrices instead */ 1476 /* 1477 Mat *AA, A = NULL, Av; 1478 IS isrow,iscol; 1479 1480 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1481 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1482 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1483 if (!rank) { 1484 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1485 A = AA[0]; 1486 Av = AA[0]; 1487 } 1488 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1489 */ 1490 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1491 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1492 /* 1493 Everyone has to call to draw the matrix since the graphics waits are 1494 synchronized across all processors that share the PetscDraw object 1495 */ 1496 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1497 if (!rank) { 1498 if (((PetscObject)mat)->name) { 1499 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1500 } 1501 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1502 } 1503 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1504 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1505 ierr = MatDestroy(&A);CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } 1509 1510 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1511 { 1512 PetscErrorCode ierr; 1513 PetscBool iascii,isdraw,issocket,isbinary; 1514 1515 PetscFunctionBegin; 1516 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1517 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1518 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1519 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1520 if (iascii || isdraw || isbinary || issocket) { 1521 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1522 } 1523 PetscFunctionReturn(0); 1524 } 1525 1526 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1527 { 1528 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1529 PetscErrorCode ierr; 1530 Vec bb1 = 0; 1531 PetscBool hasop; 1532 1533 PetscFunctionBegin; 1534 if (flag == SOR_APPLY_UPPER) { 1535 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1536 PetscFunctionReturn(0); 1537 } 1538 1539 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1540 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1541 } 1542 1543 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1544 if (flag & SOR_ZERO_INITIAL_GUESS) { 1545 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1546 its--; 1547 } 1548 1549 while (its--) { 1550 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1551 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1552 1553 /* update rhs: bb1 = bb - B*x */ 1554 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1555 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1556 1557 /* local sweep */ 1558 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1559 } 1560 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1561 if (flag & SOR_ZERO_INITIAL_GUESS) { 1562 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1563 its--; 1564 } 1565 while (its--) { 1566 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1567 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1568 1569 /* update rhs: bb1 = bb - B*x */ 1570 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1571 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1572 1573 /* local sweep */ 1574 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1575 } 1576 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1577 if (flag & SOR_ZERO_INITIAL_GUESS) { 1578 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1579 its--; 1580 } 1581 while (its--) { 1582 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1583 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1584 1585 /* update rhs: bb1 = bb - B*x */ 1586 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1587 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1588 1589 /* local sweep */ 1590 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1591 } 1592 } else if (flag & SOR_EISENSTAT) { 1593 Vec xx1; 1594 1595 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1596 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1597 1598 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1599 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1600 if (!mat->diag) { 1601 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1602 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1603 } 1604 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1605 if (hasop) { 1606 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1607 } else { 1608 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1609 } 1610 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1611 1612 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1613 1614 /* local sweep */ 1615 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1616 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1617 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1618 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1619 1620 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1621 1622 matin->factorerrortype = mat->A->factorerrortype; 1623 PetscFunctionReturn(0); 1624 } 1625 1626 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1627 { 1628 Mat aA,aB,Aperm; 1629 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1630 PetscScalar *aa,*ba; 1631 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1632 PetscSF rowsf,sf; 1633 IS parcolp = NULL; 1634 PetscBool done; 1635 PetscErrorCode ierr; 1636 1637 PetscFunctionBegin; 1638 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1639 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1640 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1641 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1642 1643 /* Invert row permutation to find out where my rows should go */ 1644 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1645 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1646 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1647 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1648 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1649 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1650 1651 /* Invert column permutation to find out where my columns should go */ 1652 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1653 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1654 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1655 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1656 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1657 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1658 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1659 1660 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1661 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1662 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1663 1664 /* Find out where my gcols should go */ 1665 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1666 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1667 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1668 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1669 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1670 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1671 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1672 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1673 1674 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1675 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1676 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1677 for (i=0; i<m; i++) { 1678 PetscInt row = rdest[i]; 1679 PetscMPIInt rowner; 1680 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1681 for (j=ai[i]; j<ai[i+1]; j++) { 1682 PetscInt col = cdest[aj[j]]; 1683 PetscMPIInt cowner; 1684 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1685 if (rowner == cowner) dnnz[i]++; 1686 else onnz[i]++; 1687 } 1688 for (j=bi[i]; j<bi[i+1]; j++) { 1689 PetscInt col = gcdest[bj[j]]; 1690 PetscMPIInt cowner; 1691 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1692 if (rowner == cowner) dnnz[i]++; 1693 else onnz[i]++; 1694 } 1695 } 1696 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1697 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1698 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1699 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1700 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1701 1702 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1703 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1704 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1705 for (i=0; i<m; i++) { 1706 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1707 PetscInt j0,rowlen; 1708 rowlen = ai[i+1] - ai[i]; 1709 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1710 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1711 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1712 } 1713 rowlen = bi[i+1] - bi[i]; 1714 for (j0=j=0; j<rowlen; j0=j) { 1715 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1716 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1717 } 1718 } 1719 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1720 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1721 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1722 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1723 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1724 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1725 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1726 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1727 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1728 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1729 *B = Aperm; 1730 PetscFunctionReturn(0); 1731 } 1732 1733 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1734 { 1735 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1736 PetscErrorCode ierr; 1737 1738 PetscFunctionBegin; 1739 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1740 if (ghosts) *ghosts = aij->garray; 1741 PetscFunctionReturn(0); 1742 } 1743 1744 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1745 { 1746 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1747 Mat A = mat->A,B = mat->B; 1748 PetscErrorCode ierr; 1749 PetscLogDouble isend[5],irecv[5]; 1750 1751 PetscFunctionBegin; 1752 info->block_size = 1.0; 1753 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1754 1755 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1756 isend[3] = info->memory; isend[4] = info->mallocs; 1757 1758 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1759 1760 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1761 isend[3] += info->memory; isend[4] += info->mallocs; 1762 if (flag == MAT_LOCAL) { 1763 info->nz_used = isend[0]; 1764 info->nz_allocated = isend[1]; 1765 info->nz_unneeded = isend[2]; 1766 info->memory = isend[3]; 1767 info->mallocs = isend[4]; 1768 } else if (flag == MAT_GLOBAL_MAX) { 1769 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1770 1771 info->nz_used = irecv[0]; 1772 info->nz_allocated = irecv[1]; 1773 info->nz_unneeded = irecv[2]; 1774 info->memory = irecv[3]; 1775 info->mallocs = irecv[4]; 1776 } else if (flag == MAT_GLOBAL_SUM) { 1777 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1778 1779 info->nz_used = irecv[0]; 1780 info->nz_allocated = irecv[1]; 1781 info->nz_unneeded = irecv[2]; 1782 info->memory = irecv[3]; 1783 info->mallocs = irecv[4]; 1784 } 1785 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1786 info->fill_ratio_needed = 0; 1787 info->factor_mallocs = 0; 1788 PetscFunctionReturn(0); 1789 } 1790 1791 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1792 { 1793 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1794 PetscErrorCode ierr; 1795 1796 PetscFunctionBegin; 1797 switch (op) { 1798 case MAT_NEW_NONZERO_LOCATIONS: 1799 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1800 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1801 case MAT_KEEP_NONZERO_PATTERN: 1802 case MAT_NEW_NONZERO_LOCATION_ERR: 1803 case MAT_USE_INODES: 1804 case MAT_IGNORE_ZERO_ENTRIES: 1805 MatCheckPreallocated(A,1); 1806 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1807 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1808 break; 1809 case MAT_ROW_ORIENTED: 1810 MatCheckPreallocated(A,1); 1811 a->roworiented = flg; 1812 1813 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1814 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1815 break; 1816 case MAT_NEW_DIAGONALS: 1817 case MAT_SORTED_FULL: 1818 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1819 break; 1820 case MAT_IGNORE_OFF_PROC_ENTRIES: 1821 a->donotstash = flg; 1822 break; 1823 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1824 case MAT_SPD: 1825 case MAT_SYMMETRIC: 1826 case MAT_STRUCTURALLY_SYMMETRIC: 1827 case MAT_HERMITIAN: 1828 case MAT_SYMMETRY_ETERNAL: 1829 break; 1830 case MAT_SUBMAT_SINGLEIS: 1831 A->submat_singleis = flg; 1832 break; 1833 case MAT_STRUCTURE_ONLY: 1834 /* The option is handled directly by MatSetOption() */ 1835 break; 1836 default: 1837 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1838 } 1839 PetscFunctionReturn(0); 1840 } 1841 1842 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1843 { 1844 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1845 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1846 PetscErrorCode ierr; 1847 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1848 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1849 PetscInt *cmap,*idx_p; 1850 1851 PetscFunctionBegin; 1852 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1853 mat->getrowactive = PETSC_TRUE; 1854 1855 if (!mat->rowvalues && (idx || v)) { 1856 /* 1857 allocate enough space to hold information from the longest row. 1858 */ 1859 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1860 PetscInt max = 1,tmp; 1861 for (i=0; i<matin->rmap->n; i++) { 1862 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1863 if (max < tmp) max = tmp; 1864 } 1865 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1866 } 1867 1868 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1869 lrow = row - rstart; 1870 1871 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1872 if (!v) {pvA = 0; pvB = 0;} 1873 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1874 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1875 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1876 nztot = nzA + nzB; 1877 1878 cmap = mat->garray; 1879 if (v || idx) { 1880 if (nztot) { 1881 /* Sort by increasing column numbers, assuming A and B already sorted */ 1882 PetscInt imark = -1; 1883 if (v) { 1884 *v = v_p = mat->rowvalues; 1885 for (i=0; i<nzB; i++) { 1886 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1887 else break; 1888 } 1889 imark = i; 1890 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1891 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1892 } 1893 if (idx) { 1894 *idx = idx_p = mat->rowindices; 1895 if (imark > -1) { 1896 for (i=0; i<imark; i++) { 1897 idx_p[i] = cmap[cworkB[i]]; 1898 } 1899 } else { 1900 for (i=0; i<nzB; i++) { 1901 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1902 else break; 1903 } 1904 imark = i; 1905 } 1906 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1907 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1908 } 1909 } else { 1910 if (idx) *idx = 0; 1911 if (v) *v = 0; 1912 } 1913 } 1914 *nz = nztot; 1915 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1916 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1917 PetscFunctionReturn(0); 1918 } 1919 1920 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1921 { 1922 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1923 1924 PetscFunctionBegin; 1925 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1926 aij->getrowactive = PETSC_FALSE; 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1931 { 1932 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1933 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1934 PetscErrorCode ierr; 1935 PetscInt i,j,cstart = mat->cmap->rstart; 1936 PetscReal sum = 0.0; 1937 MatScalar *v; 1938 1939 PetscFunctionBegin; 1940 if (aij->size == 1) { 1941 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1942 } else { 1943 if (type == NORM_FROBENIUS) { 1944 v = amat->a; 1945 for (i=0; i<amat->nz; i++) { 1946 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1947 } 1948 v = bmat->a; 1949 for (i=0; i<bmat->nz; i++) { 1950 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1951 } 1952 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1953 *norm = PetscSqrtReal(*norm); 1954 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1955 } else if (type == NORM_1) { /* max column norm */ 1956 PetscReal *tmp,*tmp2; 1957 PetscInt *jj,*garray = aij->garray; 1958 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1959 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1960 *norm = 0.0; 1961 v = amat->a; jj = amat->j; 1962 for (j=0; j<amat->nz; j++) { 1963 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1964 } 1965 v = bmat->a; jj = bmat->j; 1966 for (j=0; j<bmat->nz; j++) { 1967 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1968 } 1969 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1970 for (j=0; j<mat->cmap->N; j++) { 1971 if (tmp2[j] > *norm) *norm = tmp2[j]; 1972 } 1973 ierr = PetscFree(tmp);CHKERRQ(ierr); 1974 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1975 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1976 } else if (type == NORM_INFINITY) { /* max row norm */ 1977 PetscReal ntemp = 0.0; 1978 for (j=0; j<aij->A->rmap->n; j++) { 1979 v = amat->a + amat->i[j]; 1980 sum = 0.0; 1981 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1982 sum += PetscAbsScalar(*v); v++; 1983 } 1984 v = bmat->a + bmat->i[j]; 1985 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1986 sum += PetscAbsScalar(*v); v++; 1987 } 1988 if (sum > ntemp) ntemp = sum; 1989 } 1990 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1991 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1992 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1993 } 1994 PetscFunctionReturn(0); 1995 } 1996 1997 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1998 { 1999 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2000 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2001 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2002 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2003 PetscErrorCode ierr; 2004 Mat B,A_diag,*B_diag; 2005 const MatScalar *array; 2006 2007 PetscFunctionBegin; 2008 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2009 ai = Aloc->i; aj = Aloc->j; 2010 bi = Bloc->i; bj = Bloc->j; 2011 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2012 PetscInt *d_nnz,*g_nnz,*o_nnz; 2013 PetscSFNode *oloc; 2014 PETSC_UNUSED PetscSF sf; 2015 2016 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2017 /* compute d_nnz for preallocation */ 2018 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2019 for (i=0; i<ai[ma]; i++) { 2020 d_nnz[aj[i]]++; 2021 } 2022 /* compute local off-diagonal contributions */ 2023 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2024 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2025 /* map those to global */ 2026 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2027 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2028 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2029 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2030 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2031 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2032 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2033 2034 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2035 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2036 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2037 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2038 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2039 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2040 } else { 2041 B = *matout; 2042 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2043 } 2044 2045 b = (Mat_MPIAIJ*)B->data; 2046 A_diag = a->A; 2047 B_diag = &b->A; 2048 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2049 A_diag_ncol = A_diag->cmap->N; 2050 B_diag_ilen = sub_B_diag->ilen; 2051 B_diag_i = sub_B_diag->i; 2052 2053 /* Set ilen for diagonal of B */ 2054 for (i=0; i<A_diag_ncol; i++) { 2055 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2056 } 2057 2058 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2059 very quickly (=without using MatSetValues), because all writes are local. */ 2060 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2061 2062 /* copy over the B part */ 2063 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2064 array = Bloc->a; 2065 row = A->rmap->rstart; 2066 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2067 cols_tmp = cols; 2068 for (i=0; i<mb; i++) { 2069 ncol = bi[i+1]-bi[i]; 2070 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2071 row++; 2072 array += ncol; cols_tmp += ncol; 2073 } 2074 ierr = PetscFree(cols);CHKERRQ(ierr); 2075 2076 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2077 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2078 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2079 *matout = B; 2080 } else { 2081 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2082 } 2083 PetscFunctionReturn(0); 2084 } 2085 2086 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2087 { 2088 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2089 Mat a = aij->A,b = aij->B; 2090 PetscErrorCode ierr; 2091 PetscInt s1,s2,s3; 2092 2093 PetscFunctionBegin; 2094 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2095 if (rr) { 2096 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2097 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2098 /* Overlap communication with computation. */ 2099 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2100 } 2101 if (ll) { 2102 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2103 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2104 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2105 } 2106 /* scale the diagonal block */ 2107 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2108 2109 if (rr) { 2110 /* Do a scatter end and then right scale the off-diagonal block */ 2111 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2112 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2113 } 2114 PetscFunctionReturn(0); 2115 } 2116 2117 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2118 { 2119 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2120 PetscErrorCode ierr; 2121 2122 PetscFunctionBegin; 2123 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2124 PetscFunctionReturn(0); 2125 } 2126 2127 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2128 { 2129 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2130 Mat a,b,c,d; 2131 PetscBool flg; 2132 PetscErrorCode ierr; 2133 2134 PetscFunctionBegin; 2135 a = matA->A; b = matA->B; 2136 c = matB->A; d = matB->B; 2137 2138 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2139 if (flg) { 2140 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2141 } 2142 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2143 PetscFunctionReturn(0); 2144 } 2145 2146 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2147 { 2148 PetscErrorCode ierr; 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2150 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2151 2152 PetscFunctionBegin; 2153 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2154 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2155 /* because of the column compression in the off-processor part of the matrix a->B, 2156 the number of columns in a->B and b->B may be different, hence we cannot call 2157 the MatCopy() directly on the two parts. If need be, we can provide a more 2158 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2159 then copying the submatrices */ 2160 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2161 } else { 2162 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2163 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2164 } 2165 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2170 { 2171 PetscErrorCode ierr; 2172 2173 PetscFunctionBegin; 2174 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 /* 2179 Computes the number of nonzeros per row needed for preallocation when X and Y 2180 have different nonzero structure. 2181 */ 2182 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2183 { 2184 PetscInt i,j,k,nzx,nzy; 2185 2186 PetscFunctionBegin; 2187 /* Set the number of nonzeros in the new matrix */ 2188 for (i=0; i<m; i++) { 2189 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2190 nzx = xi[i+1] - xi[i]; 2191 nzy = yi[i+1] - yi[i]; 2192 nnz[i] = 0; 2193 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2194 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2195 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2196 nnz[i]++; 2197 } 2198 for (; k<nzy; k++) nnz[i]++; 2199 } 2200 PetscFunctionReturn(0); 2201 } 2202 2203 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2204 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2205 { 2206 PetscErrorCode ierr; 2207 PetscInt m = Y->rmap->N; 2208 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2209 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2210 2211 PetscFunctionBegin; 2212 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2213 PetscFunctionReturn(0); 2214 } 2215 2216 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2217 { 2218 PetscErrorCode ierr; 2219 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2220 PetscBLASInt bnz,one=1; 2221 Mat_SeqAIJ *x,*y; 2222 2223 PetscFunctionBegin; 2224 if (str == SAME_NONZERO_PATTERN) { 2225 PetscScalar alpha = a; 2226 x = (Mat_SeqAIJ*)xx->A->data; 2227 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2228 y = (Mat_SeqAIJ*)yy->A->data; 2229 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2230 x = (Mat_SeqAIJ*)xx->B->data; 2231 y = (Mat_SeqAIJ*)yy->B->data; 2232 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2233 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2234 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2235 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2236 will be updated */ 2237 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2238 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2239 Y->offloadmask = PETSC_OFFLOAD_CPU; 2240 } 2241 #endif 2242 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2243 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2244 } else { 2245 Mat B; 2246 PetscInt *nnz_d,*nnz_o; 2247 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2248 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2249 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2250 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2251 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2252 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2253 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2254 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2255 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2256 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2257 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2258 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2259 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2260 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2261 } 2262 PetscFunctionReturn(0); 2263 } 2264 2265 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2266 2267 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2268 { 2269 #if defined(PETSC_USE_COMPLEX) 2270 PetscErrorCode ierr; 2271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2272 2273 PetscFunctionBegin; 2274 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2275 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2276 #else 2277 PetscFunctionBegin; 2278 #endif 2279 PetscFunctionReturn(0); 2280 } 2281 2282 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2283 { 2284 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2285 PetscErrorCode ierr; 2286 2287 PetscFunctionBegin; 2288 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2289 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2290 PetscFunctionReturn(0); 2291 } 2292 2293 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2294 { 2295 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2296 PetscErrorCode ierr; 2297 2298 PetscFunctionBegin; 2299 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2300 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2301 PetscFunctionReturn(0); 2302 } 2303 2304 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2305 { 2306 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2307 PetscErrorCode ierr; 2308 PetscInt i,*idxb = 0; 2309 PetscScalar *va,*vb; 2310 Vec vtmp; 2311 2312 PetscFunctionBegin; 2313 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2314 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2315 if (idx) { 2316 for (i=0; i<A->rmap->n; i++) { 2317 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2318 } 2319 } 2320 2321 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2322 if (idx) { 2323 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2324 } 2325 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2326 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2327 2328 for (i=0; i<A->rmap->n; i++) { 2329 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2330 va[i] = vb[i]; 2331 if (idx) idx[i] = a->garray[idxb[i]]; 2332 } 2333 } 2334 2335 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2336 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2337 ierr = PetscFree(idxb);CHKERRQ(ierr); 2338 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2339 PetscFunctionReturn(0); 2340 } 2341 2342 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2343 { 2344 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2345 PetscErrorCode ierr; 2346 PetscInt i,*idxb = 0; 2347 PetscScalar *va,*vb; 2348 Vec vtmp; 2349 2350 PetscFunctionBegin; 2351 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2352 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2353 if (idx) { 2354 for (i=0; i<A->cmap->n; i++) { 2355 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2356 } 2357 } 2358 2359 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2360 if (idx) { 2361 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2362 } 2363 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2364 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2365 2366 for (i=0; i<A->rmap->n; i++) { 2367 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2368 va[i] = vb[i]; 2369 if (idx) idx[i] = a->garray[idxb[i]]; 2370 } 2371 } 2372 2373 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2374 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2375 ierr = PetscFree(idxb);CHKERRQ(ierr); 2376 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2377 PetscFunctionReturn(0); 2378 } 2379 2380 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2381 { 2382 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2383 PetscInt n = A->rmap->n; 2384 PetscInt cstart = A->cmap->rstart; 2385 PetscInt *cmap = mat->garray; 2386 PetscInt *diagIdx, *offdiagIdx; 2387 Vec diagV, offdiagV; 2388 PetscScalar *a, *diagA, *offdiagA; 2389 PetscInt r; 2390 PetscErrorCode ierr; 2391 2392 PetscFunctionBegin; 2393 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2394 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2395 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2396 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2397 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2398 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2399 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2400 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2401 for (r = 0; r < n; ++r) { 2402 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2403 a[r] = diagA[r]; 2404 idx[r] = cstart + diagIdx[r]; 2405 } else { 2406 a[r] = offdiagA[r]; 2407 idx[r] = cmap[offdiagIdx[r]]; 2408 } 2409 } 2410 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2411 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2412 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2413 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2414 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2415 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2416 PetscFunctionReturn(0); 2417 } 2418 2419 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2420 { 2421 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2422 PetscInt n = A->rmap->n; 2423 PetscInt cstart = A->cmap->rstart; 2424 PetscInt *cmap = mat->garray; 2425 PetscInt *diagIdx, *offdiagIdx; 2426 Vec diagV, offdiagV; 2427 PetscScalar *a, *diagA, *offdiagA; 2428 PetscInt r; 2429 PetscErrorCode ierr; 2430 2431 PetscFunctionBegin; 2432 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2433 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2434 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2435 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2436 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2437 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2438 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2439 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2440 for (r = 0; r < n; ++r) { 2441 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2442 a[r] = diagA[r]; 2443 idx[r] = cstart + diagIdx[r]; 2444 } else { 2445 a[r] = offdiagA[r]; 2446 idx[r] = cmap[offdiagIdx[r]]; 2447 } 2448 } 2449 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2450 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2451 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2452 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2453 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2454 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2455 PetscFunctionReturn(0); 2456 } 2457 2458 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2459 { 2460 PetscErrorCode ierr; 2461 Mat *dummy; 2462 2463 PetscFunctionBegin; 2464 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2465 *newmat = *dummy; 2466 ierr = PetscFree(dummy);CHKERRQ(ierr); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2471 { 2472 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2473 PetscErrorCode ierr; 2474 2475 PetscFunctionBegin; 2476 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2477 A->factorerrortype = a->A->factorerrortype; 2478 PetscFunctionReturn(0); 2479 } 2480 2481 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2482 { 2483 PetscErrorCode ierr; 2484 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2485 2486 PetscFunctionBegin; 2487 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2488 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2489 if (x->assembled) { 2490 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2491 } else { 2492 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2493 } 2494 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2495 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2496 PetscFunctionReturn(0); 2497 } 2498 2499 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2500 { 2501 PetscFunctionBegin; 2502 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2503 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2504 PetscFunctionReturn(0); 2505 } 2506 2507 /*@ 2508 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2509 2510 Collective on Mat 2511 2512 Input Parameters: 2513 + A - the matrix 2514 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2515 2516 Level: advanced 2517 2518 @*/ 2519 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2520 { 2521 PetscErrorCode ierr; 2522 2523 PetscFunctionBegin; 2524 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2525 PetscFunctionReturn(0); 2526 } 2527 2528 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2529 { 2530 PetscErrorCode ierr; 2531 PetscBool sc = PETSC_FALSE,flg; 2532 2533 PetscFunctionBegin; 2534 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2535 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2536 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2537 if (flg) { 2538 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2539 } 2540 ierr = PetscOptionsTail();CHKERRQ(ierr); 2541 PetscFunctionReturn(0); 2542 } 2543 2544 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2545 { 2546 PetscErrorCode ierr; 2547 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2548 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2549 2550 PetscFunctionBegin; 2551 if (!Y->preallocated) { 2552 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2553 } else if (!aij->nz) { 2554 PetscInt nonew = aij->nonew; 2555 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2556 aij->nonew = nonew; 2557 } 2558 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2559 PetscFunctionReturn(0); 2560 } 2561 2562 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2563 { 2564 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2565 PetscErrorCode ierr; 2566 2567 PetscFunctionBegin; 2568 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2569 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2570 if (d) { 2571 PetscInt rstart; 2572 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2573 *d += rstart; 2574 2575 } 2576 PetscFunctionReturn(0); 2577 } 2578 2579 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2580 { 2581 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2582 PetscErrorCode ierr; 2583 2584 PetscFunctionBegin; 2585 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2586 PetscFunctionReturn(0); 2587 } 2588 2589 /* -------------------------------------------------------------------*/ 2590 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2591 MatGetRow_MPIAIJ, 2592 MatRestoreRow_MPIAIJ, 2593 MatMult_MPIAIJ, 2594 /* 4*/ MatMultAdd_MPIAIJ, 2595 MatMultTranspose_MPIAIJ, 2596 MatMultTransposeAdd_MPIAIJ, 2597 0, 2598 0, 2599 0, 2600 /*10*/ 0, 2601 0, 2602 0, 2603 MatSOR_MPIAIJ, 2604 MatTranspose_MPIAIJ, 2605 /*15*/ MatGetInfo_MPIAIJ, 2606 MatEqual_MPIAIJ, 2607 MatGetDiagonal_MPIAIJ, 2608 MatDiagonalScale_MPIAIJ, 2609 MatNorm_MPIAIJ, 2610 /*20*/ MatAssemblyBegin_MPIAIJ, 2611 MatAssemblyEnd_MPIAIJ, 2612 MatSetOption_MPIAIJ, 2613 MatZeroEntries_MPIAIJ, 2614 /*24*/ MatZeroRows_MPIAIJ, 2615 0, 2616 0, 2617 0, 2618 0, 2619 /*29*/ MatSetUp_MPIAIJ, 2620 0, 2621 0, 2622 MatGetDiagonalBlock_MPIAIJ, 2623 0, 2624 /*34*/ MatDuplicate_MPIAIJ, 2625 0, 2626 0, 2627 0, 2628 0, 2629 /*39*/ MatAXPY_MPIAIJ, 2630 MatCreateSubMatrices_MPIAIJ, 2631 MatIncreaseOverlap_MPIAIJ, 2632 MatGetValues_MPIAIJ, 2633 MatCopy_MPIAIJ, 2634 /*44*/ MatGetRowMax_MPIAIJ, 2635 MatScale_MPIAIJ, 2636 MatShift_MPIAIJ, 2637 MatDiagonalSet_MPIAIJ, 2638 MatZeroRowsColumns_MPIAIJ, 2639 /*49*/ MatSetRandom_MPIAIJ, 2640 0, 2641 0, 2642 0, 2643 0, 2644 /*54*/ MatFDColoringCreate_MPIXAIJ, 2645 0, 2646 MatSetUnfactored_MPIAIJ, 2647 MatPermute_MPIAIJ, 2648 0, 2649 /*59*/ MatCreateSubMatrix_MPIAIJ, 2650 MatDestroy_MPIAIJ, 2651 MatView_MPIAIJ, 2652 0, 2653 0, 2654 /*64*/ 0, 2655 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2656 0, 2657 0, 2658 0, 2659 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2660 MatGetRowMinAbs_MPIAIJ, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*75*/ MatFDColoringApply_AIJ, 2666 MatSetFromOptions_MPIAIJ, 2667 0, 2668 0, 2669 MatFindZeroDiagonals_MPIAIJ, 2670 /*80*/ 0, 2671 0, 2672 0, 2673 /*83*/ MatLoad_MPIAIJ, 2674 MatIsSymmetric_MPIAIJ, 2675 0, 2676 0, 2677 0, 2678 0, 2679 /*89*/ 0, 2680 0, 2681 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2682 0, 2683 0, 2684 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2685 0, 2686 0, 2687 0, 2688 MatBindToCPU_MPIAIJ, 2689 /*99*/ MatProductSetFromOptions_MPIAIJ, 2690 0, 2691 0, 2692 MatConjugate_MPIAIJ, 2693 0, 2694 /*104*/MatSetValuesRow_MPIAIJ, 2695 MatRealPart_MPIAIJ, 2696 MatImaginaryPart_MPIAIJ, 2697 0, 2698 0, 2699 /*109*/0, 2700 0, 2701 MatGetRowMin_MPIAIJ, 2702 0, 2703 MatMissingDiagonal_MPIAIJ, 2704 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2705 0, 2706 MatGetGhosts_MPIAIJ, 2707 0, 2708 0, 2709 /*119*/0, 2710 0, 2711 0, 2712 0, 2713 MatGetMultiProcBlock_MPIAIJ, 2714 /*124*/MatFindNonzeroRows_MPIAIJ, 2715 MatGetColumnNorms_MPIAIJ, 2716 MatInvertBlockDiagonal_MPIAIJ, 2717 MatInvertVariableBlockDiagonal_MPIAIJ, 2718 MatCreateSubMatricesMPI_MPIAIJ, 2719 /*129*/0, 2720 0, 2721 0, 2722 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2723 0, 2724 /*134*/0, 2725 0, 2726 0, 2727 0, 2728 0, 2729 /*139*/MatSetBlockSizes_MPIAIJ, 2730 0, 2731 0, 2732 MatFDColoringSetUp_MPIXAIJ, 2733 MatFindOffBlockDiagonalEntries_MPIAIJ, 2734 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2735 /*145*/0, 2736 0, 2737 0 2738 }; 2739 2740 /* ----------------------------------------------------------------------------------------*/ 2741 2742 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2743 { 2744 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2745 PetscErrorCode ierr; 2746 2747 PetscFunctionBegin; 2748 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2749 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2750 PetscFunctionReturn(0); 2751 } 2752 2753 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2754 { 2755 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2756 PetscErrorCode ierr; 2757 2758 PetscFunctionBegin; 2759 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2760 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2761 PetscFunctionReturn(0); 2762 } 2763 2764 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2765 { 2766 Mat_MPIAIJ *b; 2767 PetscErrorCode ierr; 2768 PetscMPIInt size; 2769 2770 PetscFunctionBegin; 2771 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2772 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2773 b = (Mat_MPIAIJ*)B->data; 2774 2775 #if defined(PETSC_USE_CTABLE) 2776 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2777 #else 2778 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2779 #endif 2780 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2781 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2782 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2783 2784 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2785 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2786 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2787 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2788 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2789 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2790 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2791 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2792 2793 if (!B->preallocated) { 2794 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2795 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2796 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2797 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2798 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2799 } 2800 2801 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2802 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2803 B->preallocated = PETSC_TRUE; 2804 B->was_assembled = PETSC_FALSE; 2805 B->assembled = PETSC_FALSE; 2806 PetscFunctionReturn(0); 2807 } 2808 2809 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2810 { 2811 Mat_MPIAIJ *b; 2812 PetscErrorCode ierr; 2813 2814 PetscFunctionBegin; 2815 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2816 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2817 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2818 b = (Mat_MPIAIJ*)B->data; 2819 2820 #if defined(PETSC_USE_CTABLE) 2821 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2822 #else 2823 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2824 #endif 2825 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2826 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2827 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2828 2829 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2830 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2831 B->preallocated = PETSC_TRUE; 2832 B->was_assembled = PETSC_FALSE; 2833 B->assembled = PETSC_FALSE; 2834 PetscFunctionReturn(0); 2835 } 2836 2837 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2838 { 2839 Mat mat; 2840 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2841 PetscErrorCode ierr; 2842 2843 PetscFunctionBegin; 2844 *newmat = 0; 2845 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2846 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2847 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2848 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2849 a = (Mat_MPIAIJ*)mat->data; 2850 2851 mat->factortype = matin->factortype; 2852 mat->assembled = matin->assembled; 2853 mat->insertmode = NOT_SET_VALUES; 2854 mat->preallocated = matin->preallocated; 2855 2856 a->size = oldmat->size; 2857 a->rank = oldmat->rank; 2858 a->donotstash = oldmat->donotstash; 2859 a->roworiented = oldmat->roworiented; 2860 a->rowindices = NULL; 2861 a->rowvalues = NULL; 2862 a->getrowactive = PETSC_FALSE; 2863 2864 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2865 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2866 2867 if (oldmat->colmap) { 2868 #if defined(PETSC_USE_CTABLE) 2869 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2870 #else 2871 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2872 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2873 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2874 #endif 2875 } else a->colmap = NULL; 2876 if (oldmat->garray) { 2877 PetscInt len; 2878 len = oldmat->B->cmap->n; 2879 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2880 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2881 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2882 } else a->garray = NULL; 2883 2884 /* It may happen MatDuplicate is called with a non-assembled matrix 2885 In fact, MatDuplicate only requires the matrix to be preallocated 2886 This may happen inside a DMCreateMatrix_Shell */ 2887 if (oldmat->lvec) { 2888 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2889 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2890 } 2891 if (oldmat->Mvctx) { 2892 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2893 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2894 } 2895 if (oldmat->Mvctx_mpi1) { 2896 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2897 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2898 } 2899 2900 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2901 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2902 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2903 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2904 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2905 *newmat = mat; 2906 PetscFunctionReturn(0); 2907 } 2908 2909 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2910 { 2911 PetscBool isbinary, ishdf5; 2912 PetscErrorCode ierr; 2913 2914 PetscFunctionBegin; 2915 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2916 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2917 /* force binary viewer to load .info file if it has not yet done so */ 2918 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2919 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2920 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2921 if (isbinary) { 2922 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2923 } else if (ishdf5) { 2924 #if defined(PETSC_HAVE_HDF5) 2925 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2926 #else 2927 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2928 #endif 2929 } else { 2930 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2931 } 2932 PetscFunctionReturn(0); 2933 } 2934 2935 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 2936 { 2937 PetscInt header[4],M,N,m,nz,rows,cols,sum,i; 2938 PetscInt *rowidxs,*colidxs; 2939 PetscScalar *matvals; 2940 PetscErrorCode ierr; 2941 2942 PetscFunctionBegin; 2943 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2944 2945 /* read in matrix header */ 2946 ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2947 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file"); 2948 M = header[1]; N = header[2]; nz = header[3]; 2949 if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M); 2950 if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N); 2951 if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ"); 2952 2953 /* set block sizes from the viewer's .info file */ 2954 ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr); 2955 /* set global sizes if not set already */ 2956 if (mat->rmap->N < 0) mat->rmap->N = M; 2957 if (mat->cmap->N < 0) mat->cmap->N = N; 2958 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 2959 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 2960 2961 /* check if the matrix sizes are correct */ 2962 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 2963 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols); 2964 2965 /* read in row lengths and build row indices */ 2966 ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr); 2967 ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr); 2968 ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr); 2969 rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i]; 2970 ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr); 2971 if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum); 2972 /* read in column indices and matrix values */ 2973 ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr); 2974 ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr); 2975 ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr); 2976 /* store matrix indices and values */ 2977 ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr); 2978 ierr = PetscFree(rowidxs);CHKERRQ(ierr); 2979 ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr); 2980 PetscFunctionReturn(0); 2981 } 2982 2983 /* Not scalable because of ISAllGather() unless getting all columns. */ 2984 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2985 { 2986 PetscErrorCode ierr; 2987 IS iscol_local; 2988 PetscBool isstride; 2989 PetscMPIInt lisstride=0,gisstride; 2990 2991 PetscFunctionBegin; 2992 /* check if we are grabbing all columns*/ 2993 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2994 2995 if (isstride) { 2996 PetscInt start,len,mstart,mlen; 2997 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2998 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2999 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3000 if (mstart == start && mlen-mstart == len) lisstride = 1; 3001 } 3002 3003 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3004 if (gisstride) { 3005 PetscInt N; 3006 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3007 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3008 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3009 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3010 } else { 3011 PetscInt cbs; 3012 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3013 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3014 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3015 } 3016 3017 *isseq = iscol_local; 3018 PetscFunctionReturn(0); 3019 } 3020 3021 /* 3022 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3023 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3024 3025 Input Parameters: 3026 mat - matrix 3027 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3028 i.e., mat->rstart <= isrow[i] < mat->rend 3029 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3030 i.e., mat->cstart <= iscol[i] < mat->cend 3031 Output Parameter: 3032 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3033 iscol_o - sequential column index set for retrieving mat->B 3034 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3035 */ 3036 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3037 { 3038 PetscErrorCode ierr; 3039 Vec x,cmap; 3040 const PetscInt *is_idx; 3041 PetscScalar *xarray,*cmaparray; 3042 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3043 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3044 Mat B=a->B; 3045 Vec lvec=a->lvec,lcmap; 3046 PetscInt i,cstart,cend,Bn=B->cmap->N; 3047 MPI_Comm comm; 3048 VecScatter Mvctx=a->Mvctx; 3049 3050 PetscFunctionBegin; 3051 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3052 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3053 3054 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3055 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3056 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3057 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3058 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3059 3060 /* Get start indices */ 3061 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3062 isstart -= ncols; 3063 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3064 3065 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3066 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3067 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3068 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3069 for (i=0; i<ncols; i++) { 3070 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3071 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3072 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3073 } 3074 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3075 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3076 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3077 3078 /* Get iscol_d */ 3079 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3080 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3081 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3082 3083 /* Get isrow_d */ 3084 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3085 rstart = mat->rmap->rstart; 3086 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3087 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3088 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3089 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3090 3091 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3092 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3093 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3094 3095 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3096 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3097 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3098 3099 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3100 3101 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3102 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3103 3104 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3105 /* off-process column indices */ 3106 count = 0; 3107 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3108 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3109 3110 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3111 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3112 for (i=0; i<Bn; i++) { 3113 if (PetscRealPart(xarray[i]) > -1.0) { 3114 idx[count] = i; /* local column index in off-diagonal part B */ 3115 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3116 count++; 3117 } 3118 } 3119 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3120 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3121 3122 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3123 /* cannot ensure iscol_o has same blocksize as iscol! */ 3124 3125 ierr = PetscFree(idx);CHKERRQ(ierr); 3126 *garray = cmap1; 3127 3128 ierr = VecDestroy(&x);CHKERRQ(ierr); 3129 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3130 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3131 PetscFunctionReturn(0); 3132 } 3133 3134 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3135 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3136 { 3137 PetscErrorCode ierr; 3138 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3139 Mat M = NULL; 3140 MPI_Comm comm; 3141 IS iscol_d,isrow_d,iscol_o; 3142 Mat Asub = NULL,Bsub = NULL; 3143 PetscInt n; 3144 3145 PetscFunctionBegin; 3146 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3147 3148 if (call == MAT_REUSE_MATRIX) { 3149 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3150 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3151 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3152 3153 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3154 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3155 3156 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3157 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3158 3159 /* Update diagonal and off-diagonal portions of submat */ 3160 asub = (Mat_MPIAIJ*)(*submat)->data; 3161 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3162 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3163 if (n) { 3164 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3165 } 3166 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3167 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3168 3169 } else { /* call == MAT_INITIAL_MATRIX) */ 3170 const PetscInt *garray; 3171 PetscInt BsubN; 3172 3173 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3174 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3175 3176 /* Create local submatrices Asub and Bsub */ 3177 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3178 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3179 3180 /* Create submatrix M */ 3181 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3182 3183 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3184 asub = (Mat_MPIAIJ*)M->data; 3185 3186 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3187 n = asub->B->cmap->N; 3188 if (BsubN > n) { 3189 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3190 const PetscInt *idx; 3191 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3192 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3193 3194 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3195 j = 0; 3196 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3197 for (i=0; i<n; i++) { 3198 if (j >= BsubN) break; 3199 while (subgarray[i] > garray[j]) j++; 3200 3201 if (subgarray[i] == garray[j]) { 3202 idx_new[i] = idx[j++]; 3203 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3204 } 3205 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3206 3207 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3208 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3209 3210 } else if (BsubN < n) { 3211 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3212 } 3213 3214 ierr = PetscFree(garray);CHKERRQ(ierr); 3215 *submat = M; 3216 3217 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3218 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3219 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3220 3221 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3222 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3223 3224 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3225 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3226 } 3227 PetscFunctionReturn(0); 3228 } 3229 3230 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3231 { 3232 PetscErrorCode ierr; 3233 IS iscol_local=NULL,isrow_d; 3234 PetscInt csize; 3235 PetscInt n,i,j,start,end; 3236 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3237 MPI_Comm comm; 3238 3239 PetscFunctionBegin; 3240 /* If isrow has same processor distribution as mat, 3241 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3242 if (call == MAT_REUSE_MATRIX) { 3243 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3244 if (isrow_d) { 3245 sameRowDist = PETSC_TRUE; 3246 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3247 } else { 3248 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3249 if (iscol_local) { 3250 sameRowDist = PETSC_TRUE; 3251 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3252 } 3253 } 3254 } else { 3255 /* Check if isrow has same processor distribution as mat */ 3256 sameDist[0] = PETSC_FALSE; 3257 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3258 if (!n) { 3259 sameDist[0] = PETSC_TRUE; 3260 } else { 3261 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3262 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3263 if (i >= start && j < end) { 3264 sameDist[0] = PETSC_TRUE; 3265 } 3266 } 3267 3268 /* Check if iscol has same processor distribution as mat */ 3269 sameDist[1] = PETSC_FALSE; 3270 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3271 if (!n) { 3272 sameDist[1] = PETSC_TRUE; 3273 } else { 3274 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3275 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3276 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3277 } 3278 3279 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3280 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3281 sameRowDist = tsameDist[0]; 3282 } 3283 3284 if (sameRowDist) { 3285 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3286 /* isrow and iscol have same processor distribution as mat */ 3287 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3288 PetscFunctionReturn(0); 3289 } else { /* sameRowDist */ 3290 /* isrow has same processor distribution as mat */ 3291 if (call == MAT_INITIAL_MATRIX) { 3292 PetscBool sorted; 3293 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3294 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3295 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3296 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3297 3298 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3299 if (sorted) { 3300 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3301 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3302 PetscFunctionReturn(0); 3303 } 3304 } else { /* call == MAT_REUSE_MATRIX */ 3305 IS iscol_sub; 3306 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3307 if (iscol_sub) { 3308 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3309 PetscFunctionReturn(0); 3310 } 3311 } 3312 } 3313 } 3314 3315 /* General case: iscol -> iscol_local which has global size of iscol */ 3316 if (call == MAT_REUSE_MATRIX) { 3317 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3318 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3319 } else { 3320 if (!iscol_local) { 3321 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3322 } 3323 } 3324 3325 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3326 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3327 3328 if (call == MAT_INITIAL_MATRIX) { 3329 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3330 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3331 } 3332 PetscFunctionReturn(0); 3333 } 3334 3335 /*@C 3336 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3337 and "off-diagonal" part of the matrix in CSR format. 3338 3339 Collective 3340 3341 Input Parameters: 3342 + comm - MPI communicator 3343 . A - "diagonal" portion of matrix 3344 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3345 - garray - global index of B columns 3346 3347 Output Parameter: 3348 . mat - the matrix, with input A as its local diagonal matrix 3349 Level: advanced 3350 3351 Notes: 3352 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3353 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3354 3355 .seealso: MatCreateMPIAIJWithSplitArrays() 3356 @*/ 3357 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3358 { 3359 PetscErrorCode ierr; 3360 Mat_MPIAIJ *maij; 3361 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3362 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3363 PetscScalar *oa=b->a; 3364 Mat Bnew; 3365 PetscInt m,n,N; 3366 3367 PetscFunctionBegin; 3368 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3369 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3370 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3371 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3372 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3373 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3374 3375 /* Get global columns of mat */ 3376 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3377 3378 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3379 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3380 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3381 maij = (Mat_MPIAIJ*)(*mat)->data; 3382 3383 (*mat)->preallocated = PETSC_TRUE; 3384 3385 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3386 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3387 3388 /* Set A as diagonal portion of *mat */ 3389 maij->A = A; 3390 3391 nz = oi[m]; 3392 for (i=0; i<nz; i++) { 3393 col = oj[i]; 3394 oj[i] = garray[col]; 3395 } 3396 3397 /* Set Bnew as off-diagonal portion of *mat */ 3398 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3399 bnew = (Mat_SeqAIJ*)Bnew->data; 3400 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3401 maij->B = Bnew; 3402 3403 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3404 3405 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3406 b->free_a = PETSC_FALSE; 3407 b->free_ij = PETSC_FALSE; 3408 ierr = MatDestroy(&B);CHKERRQ(ierr); 3409 3410 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3411 bnew->free_a = PETSC_TRUE; 3412 bnew->free_ij = PETSC_TRUE; 3413 3414 /* condense columns of maij->B */ 3415 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3416 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3417 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3418 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3419 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3420 PetscFunctionReturn(0); 3421 } 3422 3423 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3424 3425 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3426 { 3427 PetscErrorCode ierr; 3428 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3429 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3430 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3431 Mat M,Msub,B=a->B; 3432 MatScalar *aa; 3433 Mat_SeqAIJ *aij; 3434 PetscInt *garray = a->garray,*colsub,Ncols; 3435 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3436 IS iscol_sub,iscmap; 3437 const PetscInt *is_idx,*cmap; 3438 PetscBool allcolumns=PETSC_FALSE; 3439 MPI_Comm comm; 3440 3441 PetscFunctionBegin; 3442 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3443 3444 if (call == MAT_REUSE_MATRIX) { 3445 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3446 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3447 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3448 3449 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3450 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3451 3452 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3453 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3454 3455 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3456 3457 } else { /* call == MAT_INITIAL_MATRIX) */ 3458 PetscBool flg; 3459 3460 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3461 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3462 3463 /* (1) iscol -> nonscalable iscol_local */ 3464 /* Check for special case: each processor gets entire matrix columns */ 3465 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3466 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3467 if (allcolumns) { 3468 iscol_sub = iscol_local; 3469 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3470 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3471 3472 } else { 3473 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3474 PetscInt *idx,*cmap1,k; 3475 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3476 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3477 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3478 count = 0; 3479 k = 0; 3480 for (i=0; i<Ncols; i++) { 3481 j = is_idx[i]; 3482 if (j >= cstart && j < cend) { 3483 /* diagonal part of mat */ 3484 idx[count] = j; 3485 cmap1[count++] = i; /* column index in submat */ 3486 } else if (Bn) { 3487 /* off-diagonal part of mat */ 3488 if (j == garray[k]) { 3489 idx[count] = j; 3490 cmap1[count++] = i; /* column index in submat */ 3491 } else if (j > garray[k]) { 3492 while (j > garray[k] && k < Bn-1) k++; 3493 if (j == garray[k]) { 3494 idx[count] = j; 3495 cmap1[count++] = i; /* column index in submat */ 3496 } 3497 } 3498 } 3499 } 3500 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3501 3502 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3503 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3504 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3505 3506 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3507 } 3508 3509 /* (3) Create sequential Msub */ 3510 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3511 } 3512 3513 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3514 aij = (Mat_SeqAIJ*)(Msub)->data; 3515 ii = aij->i; 3516 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3517 3518 /* 3519 m - number of local rows 3520 Ncols - number of columns (same on all processors) 3521 rstart - first row in new global matrix generated 3522 */ 3523 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3524 3525 if (call == MAT_INITIAL_MATRIX) { 3526 /* (4) Create parallel newmat */ 3527 PetscMPIInt rank,size; 3528 PetscInt csize; 3529 3530 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3531 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3532 3533 /* 3534 Determine the number of non-zeros in the diagonal and off-diagonal 3535 portions of the matrix in order to do correct preallocation 3536 */ 3537 3538 /* first get start and end of "diagonal" columns */ 3539 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3540 if (csize == PETSC_DECIDE) { 3541 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3542 if (mglobal == Ncols) { /* square matrix */ 3543 nlocal = m; 3544 } else { 3545 nlocal = Ncols/size + ((Ncols % size) > rank); 3546 } 3547 } else { 3548 nlocal = csize; 3549 } 3550 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3551 rstart = rend - nlocal; 3552 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3553 3554 /* next, compute all the lengths */ 3555 jj = aij->j; 3556 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3557 olens = dlens + m; 3558 for (i=0; i<m; i++) { 3559 jend = ii[i+1] - ii[i]; 3560 olen = 0; 3561 dlen = 0; 3562 for (j=0; j<jend; j++) { 3563 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3564 else dlen++; 3565 jj++; 3566 } 3567 olens[i] = olen; 3568 dlens[i] = dlen; 3569 } 3570 3571 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3572 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3573 3574 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3575 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3576 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3577 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3578 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3579 ierr = PetscFree(dlens);CHKERRQ(ierr); 3580 3581 } else { /* call == MAT_REUSE_MATRIX */ 3582 M = *newmat; 3583 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3584 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3585 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3586 /* 3587 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3588 rather than the slower MatSetValues(). 3589 */ 3590 M->was_assembled = PETSC_TRUE; 3591 M->assembled = PETSC_FALSE; 3592 } 3593 3594 /* (5) Set values of Msub to *newmat */ 3595 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3596 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3597 3598 jj = aij->j; 3599 aa = aij->a; 3600 for (i=0; i<m; i++) { 3601 row = rstart + i; 3602 nz = ii[i+1] - ii[i]; 3603 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3604 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3605 jj += nz; aa += nz; 3606 } 3607 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3608 3609 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3610 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3611 3612 ierr = PetscFree(colsub);CHKERRQ(ierr); 3613 3614 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3615 if (call == MAT_INITIAL_MATRIX) { 3616 *newmat = M; 3617 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3618 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3619 3620 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3621 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3622 3623 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3624 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3625 3626 if (iscol_local) { 3627 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3628 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3629 } 3630 } 3631 PetscFunctionReturn(0); 3632 } 3633 3634 /* 3635 Not great since it makes two copies of the submatrix, first an SeqAIJ 3636 in local and then by concatenating the local matrices the end result. 3637 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3638 3639 Note: This requires a sequential iscol with all indices. 3640 */ 3641 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3642 { 3643 PetscErrorCode ierr; 3644 PetscMPIInt rank,size; 3645 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3646 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3647 Mat M,Mreuse; 3648 MatScalar *aa,*vwork; 3649 MPI_Comm comm; 3650 Mat_SeqAIJ *aij; 3651 PetscBool colflag,allcolumns=PETSC_FALSE; 3652 3653 PetscFunctionBegin; 3654 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3655 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3656 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3657 3658 /* Check for special case: each processor gets entire matrix columns */ 3659 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3660 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3661 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3662 3663 if (call == MAT_REUSE_MATRIX) { 3664 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3665 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3666 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3667 } else { 3668 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3669 } 3670 3671 /* 3672 m - number of local rows 3673 n - number of columns (same on all processors) 3674 rstart - first row in new global matrix generated 3675 */ 3676 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3677 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3678 if (call == MAT_INITIAL_MATRIX) { 3679 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3680 ii = aij->i; 3681 jj = aij->j; 3682 3683 /* 3684 Determine the number of non-zeros in the diagonal and off-diagonal 3685 portions of the matrix in order to do correct preallocation 3686 */ 3687 3688 /* first get start and end of "diagonal" columns */ 3689 if (csize == PETSC_DECIDE) { 3690 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3691 if (mglobal == n) { /* square matrix */ 3692 nlocal = m; 3693 } else { 3694 nlocal = n/size + ((n % size) > rank); 3695 } 3696 } else { 3697 nlocal = csize; 3698 } 3699 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3700 rstart = rend - nlocal; 3701 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3702 3703 /* next, compute all the lengths */ 3704 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3705 olens = dlens + m; 3706 for (i=0; i<m; i++) { 3707 jend = ii[i+1] - ii[i]; 3708 olen = 0; 3709 dlen = 0; 3710 for (j=0; j<jend; j++) { 3711 if (*jj < rstart || *jj >= rend) olen++; 3712 else dlen++; 3713 jj++; 3714 } 3715 olens[i] = olen; 3716 dlens[i] = dlen; 3717 } 3718 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3719 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3720 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3721 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3722 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3723 ierr = PetscFree(dlens);CHKERRQ(ierr); 3724 } else { 3725 PetscInt ml,nl; 3726 3727 M = *newmat; 3728 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3729 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3730 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3731 /* 3732 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3733 rather than the slower MatSetValues(). 3734 */ 3735 M->was_assembled = PETSC_TRUE; 3736 M->assembled = PETSC_FALSE; 3737 } 3738 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3739 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3740 ii = aij->i; 3741 jj = aij->j; 3742 aa = aij->a; 3743 for (i=0; i<m; i++) { 3744 row = rstart + i; 3745 nz = ii[i+1] - ii[i]; 3746 cwork = jj; jj += nz; 3747 vwork = aa; aa += nz; 3748 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3749 } 3750 3751 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3752 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3753 *newmat = M; 3754 3755 /* save submatrix used in processor for next request */ 3756 if (call == MAT_INITIAL_MATRIX) { 3757 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3758 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3759 } 3760 PetscFunctionReturn(0); 3761 } 3762 3763 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3764 { 3765 PetscInt m,cstart, cend,j,nnz,i,d; 3766 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3767 const PetscInt *JJ; 3768 PetscErrorCode ierr; 3769 PetscBool nooffprocentries; 3770 3771 PetscFunctionBegin; 3772 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3773 3774 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3775 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3776 m = B->rmap->n; 3777 cstart = B->cmap->rstart; 3778 cend = B->cmap->rend; 3779 rstart = B->rmap->rstart; 3780 3781 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3782 3783 #if defined(PETSC_USE_DEBUG) 3784 for (i=0; i<m; i++) { 3785 nnz = Ii[i+1]- Ii[i]; 3786 JJ = J + Ii[i]; 3787 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3788 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3789 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3790 } 3791 #endif 3792 3793 for (i=0; i<m; i++) { 3794 nnz = Ii[i+1]- Ii[i]; 3795 JJ = J + Ii[i]; 3796 nnz_max = PetscMax(nnz_max,nnz); 3797 d = 0; 3798 for (j=0; j<nnz; j++) { 3799 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3800 } 3801 d_nnz[i] = d; 3802 o_nnz[i] = nnz - d; 3803 } 3804 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3805 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3806 3807 for (i=0; i<m; i++) { 3808 ii = i + rstart; 3809 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3810 } 3811 nooffprocentries = B->nooffprocentries; 3812 B->nooffprocentries = PETSC_TRUE; 3813 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3814 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3815 B->nooffprocentries = nooffprocentries; 3816 3817 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3818 PetscFunctionReturn(0); 3819 } 3820 3821 /*@ 3822 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3823 (the default parallel PETSc format). 3824 3825 Collective 3826 3827 Input Parameters: 3828 + B - the matrix 3829 . i - the indices into j for the start of each local row (starts with zero) 3830 . j - the column indices for each local row (starts with zero) 3831 - v - optional values in the matrix 3832 3833 Level: developer 3834 3835 Notes: 3836 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3837 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3838 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3839 3840 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3841 3842 The format which is used for the sparse matrix input, is equivalent to a 3843 row-major ordering.. i.e for the following matrix, the input data expected is 3844 as shown 3845 3846 $ 1 0 0 3847 $ 2 0 3 P0 3848 $ ------- 3849 $ 4 5 6 P1 3850 $ 3851 $ Process0 [P0]: rows_owned=[0,1] 3852 $ i = {0,1,3} [size = nrow+1 = 2+1] 3853 $ j = {0,0,2} [size = 3] 3854 $ v = {1,2,3} [size = 3] 3855 $ 3856 $ Process1 [P1]: rows_owned=[2] 3857 $ i = {0,3} [size = nrow+1 = 1+1] 3858 $ j = {0,1,2} [size = 3] 3859 $ v = {4,5,6} [size = 3] 3860 3861 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3862 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3863 @*/ 3864 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3865 { 3866 PetscErrorCode ierr; 3867 3868 PetscFunctionBegin; 3869 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3870 PetscFunctionReturn(0); 3871 } 3872 3873 /*@C 3874 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3875 (the default parallel PETSc format). For good matrix assembly performance 3876 the user should preallocate the matrix storage by setting the parameters 3877 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3878 performance can be increased by more than a factor of 50. 3879 3880 Collective 3881 3882 Input Parameters: 3883 + B - the matrix 3884 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3885 (same value is used for all local rows) 3886 . d_nnz - array containing the number of nonzeros in the various rows of the 3887 DIAGONAL portion of the local submatrix (possibly different for each row) 3888 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3889 The size of this array is equal to the number of local rows, i.e 'm'. 3890 For matrices that will be factored, you must leave room for (and set) 3891 the diagonal entry even if it is zero. 3892 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3893 submatrix (same value is used for all local rows). 3894 - o_nnz - array containing the number of nonzeros in the various rows of the 3895 OFF-DIAGONAL portion of the local submatrix (possibly different for 3896 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3897 structure. The size of this array is equal to the number 3898 of local rows, i.e 'm'. 3899 3900 If the *_nnz parameter is given then the *_nz parameter is ignored 3901 3902 The AIJ format (also called the Yale sparse matrix format or 3903 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3904 storage. The stored row and column indices begin with zero. 3905 See Users-Manual: ch_mat for details. 3906 3907 The parallel matrix is partitioned such that the first m0 rows belong to 3908 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3909 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3910 3911 The DIAGONAL portion of the local submatrix of a processor can be defined 3912 as the submatrix which is obtained by extraction the part corresponding to 3913 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3914 first row that belongs to the processor, r2 is the last row belonging to 3915 the this processor, and c1-c2 is range of indices of the local part of a 3916 vector suitable for applying the matrix to. This is an mxn matrix. In the 3917 common case of a square matrix, the row and column ranges are the same and 3918 the DIAGONAL part is also square. The remaining portion of the local 3919 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3920 3921 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3922 3923 You can call MatGetInfo() to get information on how effective the preallocation was; 3924 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3925 You can also run with the option -info and look for messages with the string 3926 malloc in them to see if additional memory allocation was needed. 3927 3928 Example usage: 3929 3930 Consider the following 8x8 matrix with 34 non-zero values, that is 3931 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3932 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3933 as follows: 3934 3935 .vb 3936 1 2 0 | 0 3 0 | 0 4 3937 Proc0 0 5 6 | 7 0 0 | 8 0 3938 9 0 10 | 11 0 0 | 12 0 3939 ------------------------------------- 3940 13 0 14 | 15 16 17 | 0 0 3941 Proc1 0 18 0 | 19 20 21 | 0 0 3942 0 0 0 | 22 23 0 | 24 0 3943 ------------------------------------- 3944 Proc2 25 26 27 | 0 0 28 | 29 0 3945 30 0 0 | 31 32 33 | 0 34 3946 .ve 3947 3948 This can be represented as a collection of submatrices as: 3949 3950 .vb 3951 A B C 3952 D E F 3953 G H I 3954 .ve 3955 3956 Where the submatrices A,B,C are owned by proc0, D,E,F are 3957 owned by proc1, G,H,I are owned by proc2. 3958 3959 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3960 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3961 The 'M','N' parameters are 8,8, and have the same values on all procs. 3962 3963 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3964 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3965 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3966 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3967 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3968 matrix, ans [DF] as another SeqAIJ matrix. 3969 3970 When d_nz, o_nz parameters are specified, d_nz storage elements are 3971 allocated for every row of the local diagonal submatrix, and o_nz 3972 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3973 One way to choose d_nz and o_nz is to use the max nonzerors per local 3974 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3975 In this case, the values of d_nz,o_nz are: 3976 .vb 3977 proc0 : dnz = 2, o_nz = 2 3978 proc1 : dnz = 3, o_nz = 2 3979 proc2 : dnz = 1, o_nz = 4 3980 .ve 3981 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3982 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3983 for proc3. i.e we are using 12+15+10=37 storage locations to store 3984 34 values. 3985 3986 When d_nnz, o_nnz parameters are specified, the storage is specified 3987 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3988 In the above case the values for d_nnz,o_nnz are: 3989 .vb 3990 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3991 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3992 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3993 .ve 3994 Here the space allocated is sum of all the above values i.e 34, and 3995 hence pre-allocation is perfect. 3996 3997 Level: intermediate 3998 3999 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4000 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4001 @*/ 4002 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4003 { 4004 PetscErrorCode ierr; 4005 4006 PetscFunctionBegin; 4007 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4008 PetscValidType(B,1); 4009 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4010 PetscFunctionReturn(0); 4011 } 4012 4013 /*@ 4014 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4015 CSR format for the local rows. 4016 4017 Collective 4018 4019 Input Parameters: 4020 + comm - MPI communicator 4021 . m - number of local rows (Cannot be PETSC_DECIDE) 4022 . n - This value should be the same as the local size used in creating the 4023 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4024 calculated if N is given) For square matrices n is almost always m. 4025 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4026 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4027 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4028 . j - column indices 4029 - a - matrix values 4030 4031 Output Parameter: 4032 . mat - the matrix 4033 4034 Level: intermediate 4035 4036 Notes: 4037 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4038 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4039 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4040 4041 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4042 4043 The format which is used for the sparse matrix input, is equivalent to a 4044 row-major ordering.. i.e for the following matrix, the input data expected is 4045 as shown 4046 4047 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4048 4049 $ 1 0 0 4050 $ 2 0 3 P0 4051 $ ------- 4052 $ 4 5 6 P1 4053 $ 4054 $ Process0 [P0]: rows_owned=[0,1] 4055 $ i = {0,1,3} [size = nrow+1 = 2+1] 4056 $ j = {0,0,2} [size = 3] 4057 $ v = {1,2,3} [size = 3] 4058 $ 4059 $ Process1 [P1]: rows_owned=[2] 4060 $ i = {0,3} [size = nrow+1 = 1+1] 4061 $ j = {0,1,2} [size = 3] 4062 $ v = {4,5,6} [size = 3] 4063 4064 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4065 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4066 @*/ 4067 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4068 { 4069 PetscErrorCode ierr; 4070 4071 PetscFunctionBegin; 4072 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4073 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4074 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4075 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4076 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4077 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4078 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4079 PetscFunctionReturn(0); 4080 } 4081 4082 /*@ 4083 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4084 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4085 4086 Collective 4087 4088 Input Parameters: 4089 + mat - the matrix 4090 . m - number of local rows (Cannot be PETSC_DECIDE) 4091 . n - This value should be the same as the local size used in creating the 4092 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4093 calculated if N is given) For square matrices n is almost always m. 4094 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4095 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4096 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4097 . J - column indices 4098 - v - matrix values 4099 4100 Level: intermediate 4101 4102 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4103 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4104 @*/ 4105 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4106 { 4107 PetscErrorCode ierr; 4108 PetscInt cstart,nnz,i,j; 4109 PetscInt *ld; 4110 PetscBool nooffprocentries; 4111 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4112 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4113 PetscScalar *ad = Ad->a, *ao = Ao->a; 4114 const PetscInt *Adi = Ad->i; 4115 PetscInt ldi,Iii,md; 4116 4117 PetscFunctionBegin; 4118 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4119 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4120 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4121 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4122 4123 cstart = mat->cmap->rstart; 4124 if (!Aij->ld) { 4125 /* count number of entries below block diagonal */ 4126 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4127 Aij->ld = ld; 4128 for (i=0; i<m; i++) { 4129 nnz = Ii[i+1]- Ii[i]; 4130 j = 0; 4131 while (J[j] < cstart && j < nnz) {j++;} 4132 J += nnz; 4133 ld[i] = j; 4134 } 4135 } else { 4136 ld = Aij->ld; 4137 } 4138 4139 for (i=0; i<m; i++) { 4140 nnz = Ii[i+1]- Ii[i]; 4141 Iii = Ii[i]; 4142 ldi = ld[i]; 4143 md = Adi[i+1]-Adi[i]; 4144 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4145 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4146 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4147 ad += md; 4148 ao += nnz - md; 4149 } 4150 nooffprocentries = mat->nooffprocentries; 4151 mat->nooffprocentries = PETSC_TRUE; 4152 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4153 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4154 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4155 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4156 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4157 mat->nooffprocentries = nooffprocentries; 4158 PetscFunctionReturn(0); 4159 } 4160 4161 /*@C 4162 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4163 (the default parallel PETSc format). For good matrix assembly performance 4164 the user should preallocate the matrix storage by setting the parameters 4165 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4166 performance can be increased by more than a factor of 50. 4167 4168 Collective 4169 4170 Input Parameters: 4171 + comm - MPI communicator 4172 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4173 This value should be the same as the local size used in creating the 4174 y vector for the matrix-vector product y = Ax. 4175 . n - This value should be the same as the local size used in creating the 4176 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4177 calculated if N is given) For square matrices n is almost always m. 4178 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4179 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4180 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4181 (same value is used for all local rows) 4182 . d_nnz - array containing the number of nonzeros in the various rows of the 4183 DIAGONAL portion of the local submatrix (possibly different for each row) 4184 or NULL, if d_nz is used to specify the nonzero structure. 4185 The size of this array is equal to the number of local rows, i.e 'm'. 4186 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4187 submatrix (same value is used for all local rows). 4188 - o_nnz - array containing the number of nonzeros in the various rows of the 4189 OFF-DIAGONAL portion of the local submatrix (possibly different for 4190 each row) or NULL, if o_nz is used to specify the nonzero 4191 structure. The size of this array is equal to the number 4192 of local rows, i.e 'm'. 4193 4194 Output Parameter: 4195 . A - the matrix 4196 4197 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4198 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4199 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4200 4201 Notes: 4202 If the *_nnz parameter is given then the *_nz parameter is ignored 4203 4204 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4205 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4206 storage requirements for this matrix. 4207 4208 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4209 processor than it must be used on all processors that share the object for 4210 that argument. 4211 4212 The user MUST specify either the local or global matrix dimensions 4213 (possibly both). 4214 4215 The parallel matrix is partitioned across processors such that the 4216 first m0 rows belong to process 0, the next m1 rows belong to 4217 process 1, the next m2 rows belong to process 2 etc.. where 4218 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4219 values corresponding to [m x N] submatrix. 4220 4221 The columns are logically partitioned with the n0 columns belonging 4222 to 0th partition, the next n1 columns belonging to the next 4223 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4224 4225 The DIAGONAL portion of the local submatrix on any given processor 4226 is the submatrix corresponding to the rows and columns m,n 4227 corresponding to the given processor. i.e diagonal matrix on 4228 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4229 etc. The remaining portion of the local submatrix [m x (N-n)] 4230 constitute the OFF-DIAGONAL portion. The example below better 4231 illustrates this concept. 4232 4233 For a square global matrix we define each processor's diagonal portion 4234 to be its local rows and the corresponding columns (a square submatrix); 4235 each processor's off-diagonal portion encompasses the remainder of the 4236 local matrix (a rectangular submatrix). 4237 4238 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4239 4240 When calling this routine with a single process communicator, a matrix of 4241 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4242 type of communicator, use the construction mechanism 4243 .vb 4244 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4245 .ve 4246 4247 $ MatCreate(...,&A); 4248 $ MatSetType(A,MATMPIAIJ); 4249 $ MatSetSizes(A, m,n,M,N); 4250 $ MatMPIAIJSetPreallocation(A,...); 4251 4252 By default, this format uses inodes (identical nodes) when possible. 4253 We search for consecutive rows with the same nonzero structure, thereby 4254 reusing matrix information to achieve increased efficiency. 4255 4256 Options Database Keys: 4257 + -mat_no_inode - Do not use inodes 4258 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4259 4260 4261 4262 Example usage: 4263 4264 Consider the following 8x8 matrix with 34 non-zero values, that is 4265 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4266 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4267 as follows 4268 4269 .vb 4270 1 2 0 | 0 3 0 | 0 4 4271 Proc0 0 5 6 | 7 0 0 | 8 0 4272 9 0 10 | 11 0 0 | 12 0 4273 ------------------------------------- 4274 13 0 14 | 15 16 17 | 0 0 4275 Proc1 0 18 0 | 19 20 21 | 0 0 4276 0 0 0 | 22 23 0 | 24 0 4277 ------------------------------------- 4278 Proc2 25 26 27 | 0 0 28 | 29 0 4279 30 0 0 | 31 32 33 | 0 34 4280 .ve 4281 4282 This can be represented as a collection of submatrices as 4283 4284 .vb 4285 A B C 4286 D E F 4287 G H I 4288 .ve 4289 4290 Where the submatrices A,B,C are owned by proc0, D,E,F are 4291 owned by proc1, G,H,I are owned by proc2. 4292 4293 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4294 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4295 The 'M','N' parameters are 8,8, and have the same values on all procs. 4296 4297 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4298 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4299 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4300 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4301 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4302 matrix, ans [DF] as another SeqAIJ matrix. 4303 4304 When d_nz, o_nz parameters are specified, d_nz storage elements are 4305 allocated for every row of the local diagonal submatrix, and o_nz 4306 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4307 One way to choose d_nz and o_nz is to use the max nonzerors per local 4308 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4309 In this case, the values of d_nz,o_nz are 4310 .vb 4311 proc0 : dnz = 2, o_nz = 2 4312 proc1 : dnz = 3, o_nz = 2 4313 proc2 : dnz = 1, o_nz = 4 4314 .ve 4315 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4316 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4317 for proc3. i.e we are using 12+15+10=37 storage locations to store 4318 34 values. 4319 4320 When d_nnz, o_nnz parameters are specified, the storage is specified 4321 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4322 In the above case the values for d_nnz,o_nnz are 4323 .vb 4324 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4325 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4326 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4327 .ve 4328 Here the space allocated is sum of all the above values i.e 34, and 4329 hence pre-allocation is perfect. 4330 4331 Level: intermediate 4332 4333 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4334 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4335 @*/ 4336 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4337 { 4338 PetscErrorCode ierr; 4339 PetscMPIInt size; 4340 4341 PetscFunctionBegin; 4342 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4343 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4344 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4345 if (size > 1) { 4346 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4347 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4348 } else { 4349 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4350 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4351 } 4352 PetscFunctionReturn(0); 4353 } 4354 4355 /*@C 4356 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4357 4358 Not collective 4359 4360 Input Parameter: 4361 . A - The MPIAIJ matrix 4362 4363 Output Parameters: 4364 + Ad - The local diagonal block as a SeqAIJ matrix 4365 . Ao - The local off-diagonal block as a SeqAIJ matrix 4366 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4367 4368 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4369 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4370 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4371 local column numbers to global column numbers in the original matrix. 4372 4373 Level: intermediate 4374 4375 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ 4376 @*/ 4377 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4378 { 4379 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4380 PetscBool flg; 4381 PetscErrorCode ierr; 4382 4383 PetscFunctionBegin; 4384 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4385 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4386 if (Ad) *Ad = a->A; 4387 if (Ao) *Ao = a->B; 4388 if (colmap) *colmap = a->garray; 4389 PetscFunctionReturn(0); 4390 } 4391 4392 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4393 { 4394 PetscErrorCode ierr; 4395 PetscInt m,N,i,rstart,nnz,Ii; 4396 PetscInt *indx; 4397 PetscScalar *values; 4398 4399 PetscFunctionBegin; 4400 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4401 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4402 PetscInt *dnz,*onz,sum,bs,cbs; 4403 4404 if (n == PETSC_DECIDE) { 4405 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4406 } 4407 /* Check sum(n) = N */ 4408 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4409 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4410 4411 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4412 rstart -= m; 4413 4414 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4415 for (i=0; i<m; i++) { 4416 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4417 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4418 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4419 } 4420 4421 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4422 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4423 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4424 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4425 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4426 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4427 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4428 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4429 } 4430 4431 /* numeric phase */ 4432 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4433 for (i=0; i<m; i++) { 4434 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4435 Ii = i + rstart; 4436 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4437 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4438 } 4439 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4440 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4441 PetscFunctionReturn(0); 4442 } 4443 4444 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4445 { 4446 PetscErrorCode ierr; 4447 PetscMPIInt rank; 4448 PetscInt m,N,i,rstart,nnz; 4449 size_t len; 4450 const PetscInt *indx; 4451 PetscViewer out; 4452 char *name; 4453 Mat B; 4454 const PetscScalar *values; 4455 4456 PetscFunctionBegin; 4457 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4458 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4459 /* Should this be the type of the diagonal block of A? */ 4460 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4461 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4462 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4463 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4464 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4465 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4466 for (i=0; i<m; i++) { 4467 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4468 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4469 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4470 } 4471 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4472 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4473 4474 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4475 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4476 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4477 sprintf(name,"%s.%d",outfile,rank); 4478 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4479 ierr = PetscFree(name);CHKERRQ(ierr); 4480 ierr = MatView(B,out);CHKERRQ(ierr); 4481 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4482 ierr = MatDestroy(&B);CHKERRQ(ierr); 4483 PetscFunctionReturn(0); 4484 } 4485 4486 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4487 { 4488 PetscErrorCode ierr; 4489 Mat_Merge_SeqsToMPI *merge; 4490 PetscContainer container; 4491 4492 PetscFunctionBegin; 4493 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4494 if (container) { 4495 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4496 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4497 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4498 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4499 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4500 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4501 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4502 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4503 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4504 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4505 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4506 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4507 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4508 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4509 ierr = PetscFree(merge);CHKERRQ(ierr); 4510 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4511 } 4512 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4513 PetscFunctionReturn(0); 4514 } 4515 4516 #include <../src/mat/utils/freespace.h> 4517 #include <petscbt.h> 4518 4519 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4520 { 4521 PetscErrorCode ierr; 4522 MPI_Comm comm; 4523 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4524 PetscMPIInt size,rank,taga,*len_s; 4525 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4526 PetscInt proc,m; 4527 PetscInt **buf_ri,**buf_rj; 4528 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4529 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4530 MPI_Request *s_waits,*r_waits; 4531 MPI_Status *status; 4532 MatScalar *aa=a->a; 4533 MatScalar **abuf_r,*ba_i; 4534 Mat_Merge_SeqsToMPI *merge; 4535 PetscContainer container; 4536 4537 PetscFunctionBegin; 4538 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4539 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4540 4541 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4542 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4543 4544 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4545 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4546 4547 bi = merge->bi; 4548 bj = merge->bj; 4549 buf_ri = merge->buf_ri; 4550 buf_rj = merge->buf_rj; 4551 4552 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4553 owners = merge->rowmap->range; 4554 len_s = merge->len_s; 4555 4556 /* send and recv matrix values */ 4557 /*-----------------------------*/ 4558 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4559 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4560 4561 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4562 for (proc=0,k=0; proc<size; proc++) { 4563 if (!len_s[proc]) continue; 4564 i = owners[proc]; 4565 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4566 k++; 4567 } 4568 4569 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4570 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4571 ierr = PetscFree(status);CHKERRQ(ierr); 4572 4573 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4574 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4575 4576 /* insert mat values of mpimat */ 4577 /*----------------------------*/ 4578 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4579 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4580 4581 for (k=0; k<merge->nrecv; k++) { 4582 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4583 nrows = *(buf_ri_k[k]); 4584 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4585 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4586 } 4587 4588 /* set values of ba */ 4589 m = merge->rowmap->n; 4590 for (i=0; i<m; i++) { 4591 arow = owners[rank] + i; 4592 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4593 bnzi = bi[i+1] - bi[i]; 4594 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4595 4596 /* add local non-zero vals of this proc's seqmat into ba */ 4597 anzi = ai[arow+1] - ai[arow]; 4598 aj = a->j + ai[arow]; 4599 aa = a->a + ai[arow]; 4600 nextaj = 0; 4601 for (j=0; nextaj<anzi; j++) { 4602 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4603 ba_i[j] += aa[nextaj++]; 4604 } 4605 } 4606 4607 /* add received vals into ba */ 4608 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4609 /* i-th row */ 4610 if (i == *nextrow[k]) { 4611 anzi = *(nextai[k]+1) - *nextai[k]; 4612 aj = buf_rj[k] + *(nextai[k]); 4613 aa = abuf_r[k] + *(nextai[k]); 4614 nextaj = 0; 4615 for (j=0; nextaj<anzi; j++) { 4616 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4617 ba_i[j] += aa[nextaj++]; 4618 } 4619 } 4620 nextrow[k]++; nextai[k]++; 4621 } 4622 } 4623 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4624 } 4625 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4626 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4627 4628 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4629 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4630 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4631 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4632 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4633 PetscFunctionReturn(0); 4634 } 4635 4636 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4637 { 4638 PetscErrorCode ierr; 4639 Mat B_mpi; 4640 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4641 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4642 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4643 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4644 PetscInt len,proc,*dnz,*onz,bs,cbs; 4645 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4646 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4647 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4648 MPI_Status *status; 4649 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4650 PetscBT lnkbt; 4651 Mat_Merge_SeqsToMPI *merge; 4652 PetscContainer container; 4653 4654 PetscFunctionBegin; 4655 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4656 4657 /* make sure it is a PETSc comm */ 4658 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4659 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4660 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4661 4662 ierr = PetscNew(&merge);CHKERRQ(ierr); 4663 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4664 4665 /* determine row ownership */ 4666 /*---------------------------------------------------------*/ 4667 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4668 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4669 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4670 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4671 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4672 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4673 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4674 4675 m = merge->rowmap->n; 4676 owners = merge->rowmap->range; 4677 4678 /* determine the number of messages to send, their lengths */ 4679 /*---------------------------------------------------------*/ 4680 len_s = merge->len_s; 4681 4682 len = 0; /* length of buf_si[] */ 4683 merge->nsend = 0; 4684 for (proc=0; proc<size; proc++) { 4685 len_si[proc] = 0; 4686 if (proc == rank) { 4687 len_s[proc] = 0; 4688 } else { 4689 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4690 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4691 } 4692 if (len_s[proc]) { 4693 merge->nsend++; 4694 nrows = 0; 4695 for (i=owners[proc]; i<owners[proc+1]; i++) { 4696 if (ai[i+1] > ai[i]) nrows++; 4697 } 4698 len_si[proc] = 2*(nrows+1); 4699 len += len_si[proc]; 4700 } 4701 } 4702 4703 /* determine the number and length of messages to receive for ij-structure */ 4704 /*-------------------------------------------------------------------------*/ 4705 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4706 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4707 4708 /* post the Irecv of j-structure */ 4709 /*-------------------------------*/ 4710 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4711 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4712 4713 /* post the Isend of j-structure */ 4714 /*--------------------------------*/ 4715 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4716 4717 for (proc=0, k=0; proc<size; proc++) { 4718 if (!len_s[proc]) continue; 4719 i = owners[proc]; 4720 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4721 k++; 4722 } 4723 4724 /* receives and sends of j-structure are complete */ 4725 /*------------------------------------------------*/ 4726 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4727 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4728 4729 /* send and recv i-structure */ 4730 /*---------------------------*/ 4731 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4732 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4733 4734 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4735 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4736 for (proc=0,k=0; proc<size; proc++) { 4737 if (!len_s[proc]) continue; 4738 /* form outgoing message for i-structure: 4739 buf_si[0]: nrows to be sent 4740 [1:nrows]: row index (global) 4741 [nrows+1:2*nrows+1]: i-structure index 4742 */ 4743 /*-------------------------------------------*/ 4744 nrows = len_si[proc]/2 - 1; 4745 buf_si_i = buf_si + nrows+1; 4746 buf_si[0] = nrows; 4747 buf_si_i[0] = 0; 4748 nrows = 0; 4749 for (i=owners[proc]; i<owners[proc+1]; i++) { 4750 anzi = ai[i+1] - ai[i]; 4751 if (anzi) { 4752 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4753 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4754 nrows++; 4755 } 4756 } 4757 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4758 k++; 4759 buf_si += len_si[proc]; 4760 } 4761 4762 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4763 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4764 4765 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4766 for (i=0; i<merge->nrecv; i++) { 4767 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4768 } 4769 4770 ierr = PetscFree(len_si);CHKERRQ(ierr); 4771 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4772 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4773 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4774 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4775 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4776 ierr = PetscFree(status);CHKERRQ(ierr); 4777 4778 /* compute a local seq matrix in each processor */ 4779 /*----------------------------------------------*/ 4780 /* allocate bi array and free space for accumulating nonzero column info */ 4781 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4782 bi[0] = 0; 4783 4784 /* create and initialize a linked list */ 4785 nlnk = N+1; 4786 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4787 4788 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4789 len = ai[owners[rank+1]] - ai[owners[rank]]; 4790 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4791 4792 current_space = free_space; 4793 4794 /* determine symbolic info for each local row */ 4795 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4796 4797 for (k=0; k<merge->nrecv; k++) { 4798 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4799 nrows = *buf_ri_k[k]; 4800 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4801 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4802 } 4803 4804 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4805 len = 0; 4806 for (i=0; i<m; i++) { 4807 bnzi = 0; 4808 /* add local non-zero cols of this proc's seqmat into lnk */ 4809 arow = owners[rank] + i; 4810 anzi = ai[arow+1] - ai[arow]; 4811 aj = a->j + ai[arow]; 4812 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4813 bnzi += nlnk; 4814 /* add received col data into lnk */ 4815 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4816 if (i == *nextrow[k]) { /* i-th row */ 4817 anzi = *(nextai[k]+1) - *nextai[k]; 4818 aj = buf_rj[k] + *nextai[k]; 4819 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4820 bnzi += nlnk; 4821 nextrow[k]++; nextai[k]++; 4822 } 4823 } 4824 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4825 4826 /* if free space is not available, make more free space */ 4827 if (current_space->local_remaining<bnzi) { 4828 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4829 nspacedouble++; 4830 } 4831 /* copy data into free space, then initialize lnk */ 4832 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4833 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4834 4835 current_space->array += bnzi; 4836 current_space->local_used += bnzi; 4837 current_space->local_remaining -= bnzi; 4838 4839 bi[i+1] = bi[i] + bnzi; 4840 } 4841 4842 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4843 4844 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4845 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4846 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4847 4848 /* create symbolic parallel matrix B_mpi */ 4849 /*---------------------------------------*/ 4850 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4851 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4852 if (n==PETSC_DECIDE) { 4853 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4854 } else { 4855 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4856 } 4857 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4858 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4859 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4860 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4861 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4862 4863 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4864 B_mpi->assembled = PETSC_FALSE; 4865 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4866 merge->bi = bi; 4867 merge->bj = bj; 4868 merge->buf_ri = buf_ri; 4869 merge->buf_rj = buf_rj; 4870 merge->coi = NULL; 4871 merge->coj = NULL; 4872 merge->owners_co = NULL; 4873 4874 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4875 4876 /* attach the supporting struct to B_mpi for reuse */ 4877 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4878 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4879 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4880 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4881 *mpimat = B_mpi; 4882 4883 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4884 PetscFunctionReturn(0); 4885 } 4886 4887 /*@C 4888 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4889 matrices from each processor 4890 4891 Collective 4892 4893 Input Parameters: 4894 + comm - the communicators the parallel matrix will live on 4895 . seqmat - the input sequential matrices 4896 . m - number of local rows (or PETSC_DECIDE) 4897 . n - number of local columns (or PETSC_DECIDE) 4898 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4899 4900 Output Parameter: 4901 . mpimat - the parallel matrix generated 4902 4903 Level: advanced 4904 4905 Notes: 4906 The dimensions of the sequential matrix in each processor MUST be the same. 4907 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4908 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4909 @*/ 4910 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4911 { 4912 PetscErrorCode ierr; 4913 PetscMPIInt size; 4914 4915 PetscFunctionBegin; 4916 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4917 if (size == 1) { 4918 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4919 if (scall == MAT_INITIAL_MATRIX) { 4920 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4921 } else { 4922 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4923 } 4924 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4925 PetscFunctionReturn(0); 4926 } 4927 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4928 if (scall == MAT_INITIAL_MATRIX) { 4929 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4930 } 4931 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4932 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4933 PetscFunctionReturn(0); 4934 } 4935 4936 /*@ 4937 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4938 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4939 with MatGetSize() 4940 4941 Not Collective 4942 4943 Input Parameters: 4944 + A - the matrix 4945 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4946 4947 Output Parameter: 4948 . A_loc - the local sequential matrix generated 4949 4950 Level: developer 4951 4952 Notes: 4953 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 4954 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 4955 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 4956 modify the values of the returned A_loc. 4957 4958 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 4959 4960 @*/ 4961 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4962 { 4963 PetscErrorCode ierr; 4964 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4965 Mat_SeqAIJ *mat,*a,*b; 4966 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4967 MatScalar *aa,*ba,*cam; 4968 PetscScalar *ca; 4969 PetscMPIInt size; 4970 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4971 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4972 PetscBool match; 4973 4974 PetscFunctionBegin; 4975 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 4976 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4977 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 4978 if (size == 1) { 4979 if (scall == MAT_INITIAL_MATRIX) { 4980 ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr); 4981 *A_loc = mpimat->A; 4982 } else if (scall == MAT_REUSE_MATRIX) { 4983 ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4984 } 4985 PetscFunctionReturn(0); 4986 } 4987 4988 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4989 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4990 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4991 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4992 aa = a->a; ba = b->a; 4993 if (scall == MAT_INITIAL_MATRIX) { 4994 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4995 ci[0] = 0; 4996 for (i=0; i<am; i++) { 4997 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4998 } 4999 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5000 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5001 k = 0; 5002 for (i=0; i<am; i++) { 5003 ncols_o = bi[i+1] - bi[i]; 5004 ncols_d = ai[i+1] - ai[i]; 5005 /* off-diagonal portion of A */ 5006 for (jo=0; jo<ncols_o; jo++) { 5007 col = cmap[*bj]; 5008 if (col >= cstart) break; 5009 cj[k] = col; bj++; 5010 ca[k++] = *ba++; 5011 } 5012 /* diagonal portion of A */ 5013 for (j=0; j<ncols_d; j++) { 5014 cj[k] = cstart + *aj++; 5015 ca[k++] = *aa++; 5016 } 5017 /* off-diagonal portion of A */ 5018 for (j=jo; j<ncols_o; j++) { 5019 cj[k] = cmap[*bj++]; 5020 ca[k++] = *ba++; 5021 } 5022 } 5023 /* put together the new matrix */ 5024 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5025 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5026 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5027 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5028 mat->free_a = PETSC_TRUE; 5029 mat->free_ij = PETSC_TRUE; 5030 mat->nonew = 0; 5031 } else if (scall == MAT_REUSE_MATRIX) { 5032 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5033 ci = mat->i; cj = mat->j; cam = mat->a; 5034 for (i=0; i<am; i++) { 5035 /* off-diagonal portion of A */ 5036 ncols_o = bi[i+1] - bi[i]; 5037 for (jo=0; jo<ncols_o; jo++) { 5038 col = cmap[*bj]; 5039 if (col >= cstart) break; 5040 *cam++ = *ba++; bj++; 5041 } 5042 /* diagonal portion of A */ 5043 ncols_d = ai[i+1] - ai[i]; 5044 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5045 /* off-diagonal portion of A */ 5046 for (j=jo; j<ncols_o; j++) { 5047 *cam++ = *ba++; bj++; 5048 } 5049 } 5050 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5051 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5052 PetscFunctionReturn(0); 5053 } 5054 5055 /*@C 5056 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5057 5058 Not Collective 5059 5060 Input Parameters: 5061 + A - the matrix 5062 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5063 - row, col - index sets of rows and columns to extract (or NULL) 5064 5065 Output Parameter: 5066 . A_loc - the local sequential matrix generated 5067 5068 Level: developer 5069 5070 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5071 5072 @*/ 5073 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5074 { 5075 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5076 PetscErrorCode ierr; 5077 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5078 IS isrowa,iscola; 5079 Mat *aloc; 5080 PetscBool match; 5081 5082 PetscFunctionBegin; 5083 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5084 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5085 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5086 if (!row) { 5087 start = A->rmap->rstart; end = A->rmap->rend; 5088 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5089 } else { 5090 isrowa = *row; 5091 } 5092 if (!col) { 5093 start = A->cmap->rstart; 5094 cmap = a->garray; 5095 nzA = a->A->cmap->n; 5096 nzB = a->B->cmap->n; 5097 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5098 ncols = 0; 5099 for (i=0; i<nzB; i++) { 5100 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5101 else break; 5102 } 5103 imark = i; 5104 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5105 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5106 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5107 } else { 5108 iscola = *col; 5109 } 5110 if (scall != MAT_INITIAL_MATRIX) { 5111 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5112 aloc[0] = *A_loc; 5113 } 5114 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5115 if (!col) { /* attach global id of condensed columns */ 5116 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5117 } 5118 *A_loc = aloc[0]; 5119 ierr = PetscFree(aloc);CHKERRQ(ierr); 5120 if (!row) { 5121 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5122 } 5123 if (!col) { 5124 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5125 } 5126 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5127 PetscFunctionReturn(0); 5128 } 5129 5130 /* 5131 * Destroy a mat that may be composed with PetscSF communication objects. 5132 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5133 * */ 5134 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5135 { 5136 PetscSF sf,osf; 5137 IS map; 5138 PetscErrorCode ierr; 5139 5140 PetscFunctionBegin; 5141 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5142 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5143 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5144 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5145 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5146 ierr = ISDestroy(&map);CHKERRQ(ierr); 5147 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5148 PetscFunctionReturn(0); 5149 } 5150 5151 /* 5152 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5153 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5154 * on a global size. 5155 * */ 5156 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5157 { 5158 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5159 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5160 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5161 PetscMPIInt owner; 5162 PetscSFNode *iremote,*oiremote; 5163 const PetscInt *lrowindices; 5164 PetscErrorCode ierr; 5165 PetscSF sf,osf; 5166 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5167 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5168 MPI_Comm comm; 5169 ISLocalToGlobalMapping mapping; 5170 5171 PetscFunctionBegin; 5172 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5173 /* plocalsize is the number of roots 5174 * nrows is the number of leaves 5175 * */ 5176 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5177 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5178 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5179 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5180 for (i=0;i<nrows;i++) { 5181 /* Find a remote index and an owner for a row 5182 * The row could be local or remote 5183 * */ 5184 owner = 0; 5185 lidx = 0; 5186 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5187 iremote[i].index = lidx; 5188 iremote[i].rank = owner; 5189 } 5190 /* Create SF to communicate how many nonzero columns for each row */ 5191 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5192 /* SF will figure out the number of nonzero colunms for each row, and their 5193 * offsets 5194 * */ 5195 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5196 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5197 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5198 5199 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5200 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5201 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5202 roffsets[0] = 0; 5203 roffsets[1] = 0; 5204 for (i=0;i<plocalsize;i++) { 5205 /* diag */ 5206 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5207 /* off diag */ 5208 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5209 /* compute offsets so that we relative location for each row */ 5210 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5211 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5212 } 5213 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5214 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5215 /* 'r' means root, and 'l' means leaf */ 5216 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5217 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5218 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5219 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5220 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5221 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5222 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5223 dntotalcols = 0; 5224 ontotalcols = 0; 5225 ncol = 0; 5226 for (i=0;i<nrows;i++) { 5227 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5228 ncol = PetscMax(pnnz[i],ncol); 5229 /* diag */ 5230 dntotalcols += nlcols[i*2+0]; 5231 /* off diag */ 5232 ontotalcols += nlcols[i*2+1]; 5233 } 5234 /* We do not need to figure the right number of columns 5235 * since all the calculations will be done by going through the raw data 5236 * */ 5237 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5238 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5239 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5240 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5241 /* diag */ 5242 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5243 /* off diag */ 5244 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5245 /* diag */ 5246 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5247 /* off diag */ 5248 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5249 dntotalcols = 0; 5250 ontotalcols = 0; 5251 ntotalcols = 0; 5252 for (i=0;i<nrows;i++) { 5253 owner = 0; 5254 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5255 /* Set iremote for diag matrix */ 5256 for (j=0;j<nlcols[i*2+0];j++) { 5257 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5258 iremote[dntotalcols].rank = owner; 5259 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5260 ilocal[dntotalcols++] = ntotalcols++; 5261 } 5262 /* off diag */ 5263 for (j=0;j<nlcols[i*2+1];j++) { 5264 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5265 oiremote[ontotalcols].rank = owner; 5266 oilocal[ontotalcols++] = ntotalcols++; 5267 } 5268 } 5269 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5270 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5271 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5272 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5273 /* P serves as roots and P_oth is leaves 5274 * Diag matrix 5275 * */ 5276 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5277 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5278 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5279 5280 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5281 /* Off diag */ 5282 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5283 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5284 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5285 /* We operate on the matrix internal data for saving memory */ 5286 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5287 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5288 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5289 /* Convert to global indices for diag matrix */ 5290 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5291 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5292 /* We want P_oth store global indices */ 5293 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5294 /* Use memory scalable approach */ 5295 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5296 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5297 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5298 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5299 /* Convert back to local indices */ 5300 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5301 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5302 nout = 0; 5303 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5304 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5305 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5306 /* Exchange values */ 5307 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5308 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5309 /* Stop PETSc from shrinking memory */ 5310 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5311 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5312 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5313 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5314 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5315 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5316 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5317 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5318 PetscFunctionReturn(0); 5319 } 5320 5321 /* 5322 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5323 * This supports MPIAIJ and MAIJ 5324 * */ 5325 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5326 { 5327 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5328 Mat_SeqAIJ *p_oth; 5329 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5330 IS rows,map; 5331 PetscHMapI hamp; 5332 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5333 MPI_Comm comm; 5334 PetscSF sf,osf; 5335 PetscBool has; 5336 PetscErrorCode ierr; 5337 5338 PetscFunctionBegin; 5339 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5340 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5341 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5342 * and then create a submatrix (that often is an overlapping matrix) 5343 * */ 5344 if (reuse==MAT_INITIAL_MATRIX) { 5345 /* Use a hash table to figure out unique keys */ 5346 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5347 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5348 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5349 count = 0; 5350 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5351 for (i=0;i<a->B->cmap->n;i++) { 5352 key = a->garray[i]/dof; 5353 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5354 if (!has) { 5355 mapping[i] = count; 5356 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5357 } else { 5358 /* Current 'i' has the same value the previous step */ 5359 mapping[i] = count-1; 5360 } 5361 } 5362 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5363 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5364 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5365 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5366 off = 0; 5367 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5368 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5369 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5370 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5371 /* In case, the matrix was already created but users want to recreate the matrix */ 5372 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5373 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5374 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5375 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5376 } else if (reuse==MAT_REUSE_MATRIX) { 5377 /* If matrix was already created, we simply update values using SF objects 5378 * that as attached to the matrix ealier. 5379 * */ 5380 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5381 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5382 if (!sf || !osf) { 5383 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5384 } 5385 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5386 /* Update values in place */ 5387 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5388 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5389 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5390 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5391 } else { 5392 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5393 } 5394 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5395 PetscFunctionReturn(0); 5396 } 5397 5398 /*@C 5399 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5400 5401 Collective on Mat 5402 5403 Input Parameters: 5404 + A,B - the matrices in mpiaij format 5405 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5406 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5407 5408 Output Parameter: 5409 + rowb, colb - index sets of rows and columns of B to extract 5410 - B_seq - the sequential matrix generated 5411 5412 Level: developer 5413 5414 @*/ 5415 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5416 { 5417 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5418 PetscErrorCode ierr; 5419 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5420 IS isrowb,iscolb; 5421 Mat *bseq=NULL; 5422 5423 PetscFunctionBegin; 5424 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5425 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5426 } 5427 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5428 5429 if (scall == MAT_INITIAL_MATRIX) { 5430 start = A->cmap->rstart; 5431 cmap = a->garray; 5432 nzA = a->A->cmap->n; 5433 nzB = a->B->cmap->n; 5434 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5435 ncols = 0; 5436 for (i=0; i<nzB; i++) { /* row < local row index */ 5437 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5438 else break; 5439 } 5440 imark = i; 5441 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5442 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5443 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5444 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5445 } else { 5446 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5447 isrowb = *rowb; iscolb = *colb; 5448 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5449 bseq[0] = *B_seq; 5450 } 5451 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5452 *B_seq = bseq[0]; 5453 ierr = PetscFree(bseq);CHKERRQ(ierr); 5454 if (!rowb) { 5455 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5456 } else { 5457 *rowb = isrowb; 5458 } 5459 if (!colb) { 5460 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5461 } else { 5462 *colb = iscolb; 5463 } 5464 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5465 PetscFunctionReturn(0); 5466 } 5467 5468 /* 5469 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5470 of the OFF-DIAGONAL portion of local A 5471 5472 Collective on Mat 5473 5474 Input Parameters: 5475 + A,B - the matrices in mpiaij format 5476 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5477 5478 Output Parameter: 5479 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5480 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5481 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5482 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5483 5484 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5485 for this matrix. This is not desirable.. 5486 5487 Level: developer 5488 5489 */ 5490 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5491 { 5492 PetscErrorCode ierr; 5493 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5494 Mat_SeqAIJ *b_oth; 5495 VecScatter ctx; 5496 MPI_Comm comm; 5497 const PetscMPIInt *rprocs,*sprocs; 5498 const PetscInt *srow,*rstarts,*sstarts; 5499 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5500 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5501 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5502 MPI_Request *rwaits = NULL,*swaits = NULL; 5503 MPI_Status rstatus; 5504 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5505 5506 PetscFunctionBegin; 5507 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5508 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5509 5510 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5511 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5512 } 5513 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5514 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5515 5516 if (size == 1) { 5517 startsj_s = NULL; 5518 bufa_ptr = NULL; 5519 *B_oth = NULL; 5520 PetscFunctionReturn(0); 5521 } 5522 5523 ctx = a->Mvctx; 5524 tag = ((PetscObject)ctx)->tag; 5525 5526 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5527 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5528 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5529 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5530 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5531 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5532 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5533 5534 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5535 if (scall == MAT_INITIAL_MATRIX) { 5536 /* i-array */ 5537 /*---------*/ 5538 /* post receives */ 5539 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5540 for (i=0; i<nrecvs; i++) { 5541 rowlen = rvalues + rstarts[i]*rbs; 5542 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5543 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5544 } 5545 5546 /* pack the outgoing message */ 5547 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5548 5549 sstartsj[0] = 0; 5550 rstartsj[0] = 0; 5551 len = 0; /* total length of j or a array to be sent */ 5552 if (nsends) { 5553 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5554 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5555 } 5556 for (i=0; i<nsends; i++) { 5557 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5558 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5559 for (j=0; j<nrows; j++) { 5560 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5561 for (l=0; l<sbs; l++) { 5562 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5563 5564 rowlen[j*sbs+l] = ncols; 5565 5566 len += ncols; 5567 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5568 } 5569 k++; 5570 } 5571 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5572 5573 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5574 } 5575 /* recvs and sends of i-array are completed */ 5576 i = nrecvs; 5577 while (i--) { 5578 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5579 } 5580 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5581 ierr = PetscFree(svalues);CHKERRQ(ierr); 5582 5583 /* allocate buffers for sending j and a arrays */ 5584 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5585 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5586 5587 /* create i-array of B_oth */ 5588 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5589 5590 b_othi[0] = 0; 5591 len = 0; /* total length of j or a array to be received */ 5592 k = 0; 5593 for (i=0; i<nrecvs; i++) { 5594 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5595 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5596 for (j=0; j<nrows; j++) { 5597 b_othi[k+1] = b_othi[k] + rowlen[j]; 5598 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5599 k++; 5600 } 5601 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5602 } 5603 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5604 5605 /* allocate space for j and a arrrays of B_oth */ 5606 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5607 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5608 5609 /* j-array */ 5610 /*---------*/ 5611 /* post receives of j-array */ 5612 for (i=0; i<nrecvs; i++) { 5613 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5614 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5615 } 5616 5617 /* pack the outgoing message j-array */ 5618 if (nsends) k = sstarts[0]; 5619 for (i=0; i<nsends; i++) { 5620 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5621 bufJ = bufj+sstartsj[i]; 5622 for (j=0; j<nrows; j++) { 5623 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5624 for (ll=0; ll<sbs; ll++) { 5625 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5626 for (l=0; l<ncols; l++) { 5627 *bufJ++ = cols[l]; 5628 } 5629 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5630 } 5631 } 5632 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5633 } 5634 5635 /* recvs and sends of j-array are completed */ 5636 i = nrecvs; 5637 while (i--) { 5638 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5639 } 5640 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5641 } else if (scall == MAT_REUSE_MATRIX) { 5642 sstartsj = *startsj_s; 5643 rstartsj = *startsj_r; 5644 bufa = *bufa_ptr; 5645 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5646 b_otha = b_oth->a; 5647 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5648 5649 /* a-array */ 5650 /*---------*/ 5651 /* post receives of a-array */ 5652 for (i=0; i<nrecvs; i++) { 5653 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5654 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5655 } 5656 5657 /* pack the outgoing message a-array */ 5658 if (nsends) k = sstarts[0]; 5659 for (i=0; i<nsends; i++) { 5660 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5661 bufA = bufa+sstartsj[i]; 5662 for (j=0; j<nrows; j++) { 5663 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5664 for (ll=0; ll<sbs; ll++) { 5665 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5666 for (l=0; l<ncols; l++) { 5667 *bufA++ = vals[l]; 5668 } 5669 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5670 } 5671 } 5672 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5673 } 5674 /* recvs and sends of a-array are completed */ 5675 i = nrecvs; 5676 while (i--) { 5677 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5678 } 5679 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5680 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5681 5682 if (scall == MAT_INITIAL_MATRIX) { 5683 /* put together the new matrix */ 5684 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5685 5686 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5687 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5688 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5689 b_oth->free_a = PETSC_TRUE; 5690 b_oth->free_ij = PETSC_TRUE; 5691 b_oth->nonew = 0; 5692 5693 ierr = PetscFree(bufj);CHKERRQ(ierr); 5694 if (!startsj_s || !bufa_ptr) { 5695 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5696 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5697 } else { 5698 *startsj_s = sstartsj; 5699 *startsj_r = rstartsj; 5700 *bufa_ptr = bufa; 5701 } 5702 } 5703 5704 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5705 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5706 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5707 PetscFunctionReturn(0); 5708 } 5709 5710 /*@C 5711 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5712 5713 Not Collective 5714 5715 Input Parameters: 5716 . A - The matrix in mpiaij format 5717 5718 Output Parameter: 5719 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5720 . colmap - A map from global column index to local index into lvec 5721 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5722 5723 Level: developer 5724 5725 @*/ 5726 #if defined(PETSC_USE_CTABLE) 5727 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5728 #else 5729 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5730 #endif 5731 { 5732 Mat_MPIAIJ *a; 5733 5734 PetscFunctionBegin; 5735 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5736 PetscValidPointer(lvec, 2); 5737 PetscValidPointer(colmap, 3); 5738 PetscValidPointer(multScatter, 4); 5739 a = (Mat_MPIAIJ*) A->data; 5740 if (lvec) *lvec = a->lvec; 5741 if (colmap) *colmap = a->colmap; 5742 if (multScatter) *multScatter = a->Mvctx; 5743 PetscFunctionReturn(0); 5744 } 5745 5746 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5747 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5748 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5749 #if defined(PETSC_HAVE_MKL_SPARSE) 5750 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5751 #endif 5752 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5753 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5754 #if defined(PETSC_HAVE_ELEMENTAL) 5755 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5756 #endif 5757 #if defined(PETSC_HAVE_HYPRE) 5758 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5759 #endif 5760 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5761 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5762 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5763 5764 /* 5765 Computes (B'*A')' since computing B*A directly is untenable 5766 5767 n p p 5768 ( ) ( ) ( ) 5769 m ( A ) * n ( B ) = m ( C ) 5770 ( ) ( ) ( ) 5771 5772 */ 5773 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5774 { 5775 PetscErrorCode ierr; 5776 Mat At,Bt,Ct; 5777 5778 PetscFunctionBegin; 5779 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5780 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5781 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5782 ierr = MatDestroy(&At);CHKERRQ(ierr); 5783 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5784 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5785 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5786 PetscFunctionReturn(0); 5787 } 5788 5789 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C) 5790 { 5791 PetscErrorCode ierr; 5792 PetscInt m=A->rmap->n,n=B->cmap->n; 5793 5794 PetscFunctionBegin; 5795 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5796 ierr = MatSetSizes(C,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5797 ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr); 5798 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 5799 ierr = MatMPIDenseSetPreallocation(C,NULL);CHKERRQ(ierr); 5800 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5801 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5802 5803 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5804 PetscFunctionReturn(0); 5805 } 5806 5807 /* ----------------------------------------------------------------*/ 5808 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 5809 { 5810 Mat_Product *product = C->product; 5811 Mat A = product->A,B=product->B; 5812 5813 PetscFunctionBegin; 5814 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5815 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5816 5817 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5818 C->ops->productsymbolic = MatProductSymbolic_AB; 5819 PetscFunctionReturn(0); 5820 } 5821 5822 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 5823 { 5824 PetscErrorCode ierr; 5825 Mat_Product *product = C->product; 5826 5827 PetscFunctionBegin; 5828 if (product->type == MATPRODUCT_AB) { 5829 ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr); 5830 } else SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for MPIDense and MPIAIJ matrices",MatProductTypes[product->type]); 5831 PetscFunctionReturn(0); 5832 } 5833 /* ----------------------------------------------------------------*/ 5834 5835 /*MC 5836 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5837 5838 Options Database Keys: 5839 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5840 5841 Level: beginner 5842 5843 Notes: 5844 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5845 in this case the values associated with the rows and columns one passes in are set to zero 5846 in the matrix 5847 5848 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5849 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5850 5851 .seealso: MatCreateAIJ() 5852 M*/ 5853 5854 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5855 { 5856 Mat_MPIAIJ *b; 5857 PetscErrorCode ierr; 5858 PetscMPIInt size; 5859 5860 PetscFunctionBegin; 5861 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5862 5863 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5864 B->data = (void*)b; 5865 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5866 B->assembled = PETSC_FALSE; 5867 B->insertmode = NOT_SET_VALUES; 5868 b->size = size; 5869 5870 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5871 5872 /* build cache for off array entries formed */ 5873 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5874 5875 b->donotstash = PETSC_FALSE; 5876 b->colmap = 0; 5877 b->garray = 0; 5878 b->roworiented = PETSC_TRUE; 5879 5880 /* stuff used for matrix vector multiply */ 5881 b->lvec = NULL; 5882 b->Mvctx = NULL; 5883 5884 /* stuff for MatGetRow() */ 5885 b->rowindices = 0; 5886 b->rowvalues = 0; 5887 b->getrowactive = PETSC_FALSE; 5888 5889 /* flexible pointer used in CUSP/CUSPARSE classes */ 5890 b->spptr = NULL; 5891 5892 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5893 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5894 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5895 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5896 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5897 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5898 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5899 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5900 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5901 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5902 #if defined(PETSC_HAVE_MKL_SPARSE) 5903 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5904 #endif 5905 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5906 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 5907 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5908 #if defined(PETSC_HAVE_ELEMENTAL) 5909 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5910 #endif 5911 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5912 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5913 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5914 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5915 #if defined(PETSC_HAVE_HYPRE) 5916 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5917 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5918 #endif 5919 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr); 5920 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr); 5921 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5922 PetscFunctionReturn(0); 5923 } 5924 5925 /*@C 5926 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5927 and "off-diagonal" part of the matrix in CSR format. 5928 5929 Collective 5930 5931 Input Parameters: 5932 + comm - MPI communicator 5933 . m - number of local rows (Cannot be PETSC_DECIDE) 5934 . n - This value should be the same as the local size used in creating the 5935 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5936 calculated if N is given) For square matrices n is almost always m. 5937 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5938 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5939 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5940 . j - column indices 5941 . a - matrix values 5942 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5943 . oj - column indices 5944 - oa - matrix values 5945 5946 Output Parameter: 5947 . mat - the matrix 5948 5949 Level: advanced 5950 5951 Notes: 5952 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5953 must free the arrays once the matrix has been destroyed and not before. 5954 5955 The i and j indices are 0 based 5956 5957 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5958 5959 This sets local rows and cannot be used to set off-processor values. 5960 5961 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5962 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5963 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5964 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5965 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5966 communication if it is known that only local entries will be set. 5967 5968 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5969 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5970 @*/ 5971 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5972 { 5973 PetscErrorCode ierr; 5974 Mat_MPIAIJ *maij; 5975 5976 PetscFunctionBegin; 5977 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5978 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5979 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5980 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5981 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5982 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5983 maij = (Mat_MPIAIJ*) (*mat)->data; 5984 5985 (*mat)->preallocated = PETSC_TRUE; 5986 5987 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5988 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5989 5990 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5991 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5992 5993 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5994 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5995 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5996 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5997 5998 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5999 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6000 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6001 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6002 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6003 PetscFunctionReturn(0); 6004 } 6005 6006 /* 6007 Special version for direct calls from Fortran 6008 */ 6009 #include <petsc/private/fortranimpl.h> 6010 6011 /* Change these macros so can be used in void function */ 6012 #undef CHKERRQ 6013 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6014 #undef SETERRQ2 6015 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6016 #undef SETERRQ3 6017 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6018 #undef SETERRQ 6019 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6020 6021 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6022 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6023 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6024 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6025 #else 6026 #endif 6027 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6028 { 6029 Mat mat = *mmat; 6030 PetscInt m = *mm, n = *mn; 6031 InsertMode addv = *maddv; 6032 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6033 PetscScalar value; 6034 PetscErrorCode ierr; 6035 6036 MatCheckPreallocated(mat,1); 6037 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6038 6039 #if defined(PETSC_USE_DEBUG) 6040 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6041 #endif 6042 { 6043 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6044 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6045 PetscBool roworiented = aij->roworiented; 6046 6047 /* Some Variables required in the macro */ 6048 Mat A = aij->A; 6049 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6050 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6051 MatScalar *aa = a->a; 6052 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6053 Mat B = aij->B; 6054 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6055 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6056 MatScalar *ba = b->a; 6057 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 6058 * cannot use "#if defined" inside a macro. */ 6059 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 6060 6061 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6062 PetscInt nonew = a->nonew; 6063 MatScalar *ap1,*ap2; 6064 6065 PetscFunctionBegin; 6066 for (i=0; i<m; i++) { 6067 if (im[i] < 0) continue; 6068 #if defined(PETSC_USE_DEBUG) 6069 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6070 #endif 6071 if (im[i] >= rstart && im[i] < rend) { 6072 row = im[i] - rstart; 6073 lastcol1 = -1; 6074 rp1 = aj + ai[row]; 6075 ap1 = aa + ai[row]; 6076 rmax1 = aimax[row]; 6077 nrow1 = ailen[row]; 6078 low1 = 0; 6079 high1 = nrow1; 6080 lastcol2 = -1; 6081 rp2 = bj + bi[row]; 6082 ap2 = ba + bi[row]; 6083 rmax2 = bimax[row]; 6084 nrow2 = bilen[row]; 6085 low2 = 0; 6086 high2 = nrow2; 6087 6088 for (j=0; j<n; j++) { 6089 if (roworiented) value = v[i*n+j]; 6090 else value = v[i+j*m]; 6091 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 6092 if (in[j] >= cstart && in[j] < cend) { 6093 col = in[j] - cstart; 6094 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6095 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6096 if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU; 6097 #endif 6098 } else if (in[j] < 0) continue; 6099 #if defined(PETSC_USE_DEBUG) 6100 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6101 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6102 #endif 6103 else { 6104 if (mat->was_assembled) { 6105 if (!aij->colmap) { 6106 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6107 } 6108 #if defined(PETSC_USE_CTABLE) 6109 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6110 col--; 6111 #else 6112 col = aij->colmap[in[j]] - 1; 6113 #endif 6114 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6115 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6116 col = in[j]; 6117 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6118 B = aij->B; 6119 b = (Mat_SeqAIJ*)B->data; 6120 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6121 rp2 = bj + bi[row]; 6122 ap2 = ba + bi[row]; 6123 rmax2 = bimax[row]; 6124 nrow2 = bilen[row]; 6125 low2 = 0; 6126 high2 = nrow2; 6127 bm = aij->B->rmap->n; 6128 ba = b->a; 6129 inserted = PETSC_FALSE; 6130 } 6131 } else col = in[j]; 6132 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6133 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 6134 if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU; 6135 #endif 6136 } 6137 } 6138 } else if (!aij->donotstash) { 6139 if (roworiented) { 6140 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6141 } else { 6142 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6143 } 6144 } 6145 } 6146 } 6147 PetscFunctionReturnVoid(); 6148 } 6149