1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->pinnedtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 goto a_noinsert; \ 468 } \ 469 } \ 470 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 471 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 472 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 473 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 474 N = nrow1++ - 1; a->nz++; high1++; \ 475 /* shift up all the later entries in this row */ \ 476 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 477 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 478 rp1[_i] = col; \ 479 ap1[_i] = value; \ 480 A->nonzerostate++;\ 481 a_noinsert: ; \ 482 ailen[row] = nrow1; \ 483 } 484 485 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 486 { \ 487 if (col <= lastcol2) low2 = 0; \ 488 else high2 = nrow2; \ 489 lastcol2 = col; \ 490 while (high2-low2 > 5) { \ 491 t = (low2+high2)/2; \ 492 if (rp2[t] > col) high2 = t; \ 493 else low2 = t; \ 494 } \ 495 for (_i=low2; _i<high2; _i++) { \ 496 if (rp2[_i] > col) break; \ 497 if (rp2[_i] == col) { \ 498 if (addv == ADD_VALUES) { \ 499 ap2[_i] += value; \ 500 (void)PetscLogFlops(1.0); \ 501 } \ 502 else ap2[_i] = value; \ 503 goto b_noinsert; \ 504 } \ 505 } \ 506 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 507 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 508 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 509 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 510 N = nrow2++ - 1; b->nz++; high2++; \ 511 /* shift up all the later entries in this row */ \ 512 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 513 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 514 rp2[_i] = col; \ 515 ap2[_i] = value; \ 516 B->nonzerostate++; \ 517 b_noinsert: ; \ 518 bilen[row] = nrow2; \ 519 } 520 521 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 522 { 523 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 524 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 525 PetscErrorCode ierr; 526 PetscInt l,*garray = mat->garray,diag; 527 528 PetscFunctionBegin; 529 /* code only works for square matrices A */ 530 531 /* find size of row to the left of the diagonal part */ 532 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 533 row = row - diag; 534 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 535 if (garray[b->j[b->i[row]+l]] > diag) break; 536 } 537 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 538 539 /* diagonal part */ 540 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 541 542 /* right of diagonal part */ 543 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 548 { 549 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 550 PetscScalar value = 0.0; 551 PetscErrorCode ierr; 552 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 553 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 554 PetscBool roworiented = aij->roworiented; 555 556 /* Some Variables required in the macro */ 557 Mat A = aij->A; 558 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 559 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 560 MatScalar *aa = a->a; 561 PetscBool ignorezeroentries = a->ignorezeroentries; 562 Mat B = aij->B; 563 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 564 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 565 MatScalar *ba = b->a; 566 567 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 568 PetscInt nonew; 569 MatScalar *ap1,*ap2; 570 571 PetscFunctionBegin; 572 for (i=0; i<m; i++) { 573 if (im[i] < 0) continue; 574 #if defined(PETSC_USE_DEBUG) 575 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 576 #endif 577 if (im[i] >= rstart && im[i] < rend) { 578 row = im[i] - rstart; 579 lastcol1 = -1; 580 rp1 = aj + ai[row]; 581 ap1 = aa + ai[row]; 582 rmax1 = aimax[row]; 583 nrow1 = ailen[row]; 584 low1 = 0; 585 high1 = nrow1; 586 lastcol2 = -1; 587 rp2 = bj + bi[row]; 588 ap2 = ba + bi[row]; 589 rmax2 = bimax[row]; 590 nrow2 = bilen[row]; 591 low2 = 0; 592 high2 = nrow2; 593 594 for (j=0; j<n; j++) { 595 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 596 if (in[j] >= cstart && in[j] < cend) { 597 col = in[j] - cstart; 598 nonew = a->nonew; 599 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 600 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 601 } else if (in[j] < 0) continue; 602 #if defined(PETSC_USE_DEBUG) 603 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 604 #endif 605 else { 606 if (mat->was_assembled) { 607 if (!aij->colmap) { 608 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 609 } 610 #if defined(PETSC_USE_CTABLE) 611 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 612 col--; 613 #else 614 col = aij->colmap[in[j]] - 1; 615 #endif 616 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 617 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 618 col = in[j]; 619 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 620 B = aij->B; 621 b = (Mat_SeqAIJ*)B->data; 622 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 623 rp2 = bj + bi[row]; 624 ap2 = ba + bi[row]; 625 rmax2 = bimax[row]; 626 nrow2 = bilen[row]; 627 low2 = 0; 628 high2 = nrow2; 629 bm = aij->B->rmap->n; 630 ba = b->a; 631 } else if (col < 0) { 632 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 633 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 634 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 635 } 636 } else col = in[j]; 637 nonew = b->nonew; 638 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 639 } 640 } 641 } else { 642 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 643 if (!aij->donotstash) { 644 mat->assembled = PETSC_FALSE; 645 if (roworiented) { 646 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 647 } else { 648 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 649 } 650 } 651 } 652 } 653 PetscFunctionReturn(0); 654 } 655 656 /* 657 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 658 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 659 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 660 */ 661 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 662 { 663 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 664 Mat A = aij->A; /* diagonal part of the matrix */ 665 Mat B = aij->B; /* offdiagonal part of the matrix */ 666 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 667 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 668 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 669 PetscInt *ailen = a->ilen,*aj = a->j; 670 PetscInt *bilen = b->ilen,*bj = b->j; 671 PetscInt am = aij->A->rmap->n,j; 672 PetscInt diag_so_far = 0,dnz; 673 PetscInt offd_so_far = 0,onz; 674 675 PetscFunctionBegin; 676 /* Iterate over all rows of the matrix */ 677 for (j=0; j<am; j++) { 678 dnz = onz = 0; 679 /* Iterate over all non-zero columns of the current row */ 680 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 681 /* If column is in the diagonal */ 682 if (mat_j[col] >= cstart && mat_j[col] < cend) { 683 aj[diag_so_far++] = mat_j[col] - cstart; 684 dnz++; 685 } else { /* off-diagonal entries */ 686 bj[offd_so_far++] = mat_j[col]; 687 onz++; 688 } 689 } 690 ailen[j] = dnz; 691 bilen[j] = onz; 692 } 693 PetscFunctionReturn(0); 694 } 695 696 /* 697 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 698 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 699 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 700 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 701 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 702 */ 703 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 704 { 705 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 706 Mat A = aij->A; /* diagonal part of the matrix */ 707 Mat B = aij->B; /* offdiagonal part of the matrix */ 708 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 710 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 711 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 712 PetscInt *ailen = a->ilen,*aj = a->j; 713 PetscInt *bilen = b->ilen,*bj = b->j; 714 PetscInt am = aij->A->rmap->n,j; 715 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 716 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 717 PetscScalar *aa = a->a,*ba = b->a; 718 719 PetscFunctionBegin; 720 /* Iterate over all rows of the matrix */ 721 for (j=0; j<am; j++) { 722 dnz_row = onz_row = 0; 723 rowstart_offd = full_offd_i[j]; 724 rowstart_diag = full_diag_i[j]; 725 /* Iterate over all non-zero columns of the current row */ 726 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 727 /* If column is in the diagonal */ 728 if (mat_j[col] >= cstart && mat_j[col] < cend) { 729 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 730 aa[rowstart_diag+dnz_row] = mat_a[col]; 731 dnz_row++; 732 } else { /* off-diagonal entries */ 733 bj[rowstart_offd+onz_row] = mat_j[col]; 734 ba[rowstart_offd+onz_row] = mat_a[col]; 735 onz_row++; 736 } 737 } 738 ailen[j] = dnz_row; 739 bilen[j] = onz_row; 740 } 741 PetscFunctionReturn(0); 742 } 743 744 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 745 { 746 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 747 PetscErrorCode ierr; 748 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 749 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 750 751 PetscFunctionBegin; 752 for (i=0; i<m; i++) { 753 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 754 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 755 if (idxm[i] >= rstart && idxm[i] < rend) { 756 row = idxm[i] - rstart; 757 for (j=0; j<n; j++) { 758 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 759 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 760 if (idxn[j] >= cstart && idxn[j] < cend) { 761 col = idxn[j] - cstart; 762 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 763 } else { 764 if (!aij->colmap) { 765 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 766 } 767 #if defined(PETSC_USE_CTABLE) 768 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 769 col--; 770 #else 771 col = aij->colmap[idxn[j]] - 1; 772 #endif 773 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 774 else { 775 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776 } 777 } 778 } 779 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 785 786 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 787 { 788 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 789 PetscErrorCode ierr; 790 PetscInt nstash,reallocs; 791 792 PetscFunctionBegin; 793 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 794 795 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 796 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 797 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 798 PetscFunctionReturn(0); 799 } 800 801 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 805 PetscErrorCode ierr; 806 PetscMPIInt n; 807 PetscInt i,j,rstart,ncols,flg; 808 PetscInt *row,*col; 809 PetscBool other_disassembled; 810 PetscScalar *val; 811 812 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 813 814 PetscFunctionBegin; 815 if (!aij->donotstash && !mat->nooffprocentries) { 816 while (1) { 817 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 818 if (!flg) break; 819 820 for (i=0; i<n; ) { 821 /* Now identify the consecutive vals belonging to the same row */ 822 for (j=i,rstart=row[j]; j<n; j++) { 823 if (row[j] != rstart) break; 824 } 825 if (j < n) ncols = j-i; 826 else ncols = n-i; 827 /* Now assemble all these values with a single function call */ 828 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 829 830 i = j; 831 } 832 } 833 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 834 } 835 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 836 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 837 #endif 838 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 839 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 840 841 /* determine if any processor has disassembled, if so we must 842 also disassemble ourself, in order that we may reassemble. */ 843 /* 844 if nonzero structure of submatrix B cannot change then we know that 845 no processor disassembled thus we can skip this stuff 846 */ 847 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 848 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 849 if (mat->was_assembled && !other_disassembled) { 850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 851 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 852 #endif 853 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 854 } 855 } 856 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 857 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 858 } 859 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 860 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 861 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 862 #endif 863 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 864 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 865 866 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 867 868 aij->rowvalues = 0; 869 870 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 871 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 872 873 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 874 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 875 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 876 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 877 } 878 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 879 mat->offloadmask = PETSC_OFFLOAD_BOTH; 880 #endif 881 PetscFunctionReturn(0); 882 } 883 884 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 885 { 886 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 891 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 896 { 897 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 898 PetscObjectState sA, sB; 899 PetscInt *lrows; 900 PetscInt r, len; 901 PetscBool cong, lch, gch; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 /* get locally owned rows */ 906 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 907 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 908 /* fix right hand side if needed */ 909 if (x && b) { 910 const PetscScalar *xx; 911 PetscScalar *bb; 912 913 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 914 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 915 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 916 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 917 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 918 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 919 } 920 921 sA = mat->A->nonzerostate; 922 sB = mat->B->nonzerostate; 923 924 if (diag != 0.0 && cong) { 925 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 926 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 927 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 928 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 929 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 930 PetscInt nnwA, nnwB; 931 PetscBool nnzA, nnzB; 932 933 nnwA = aijA->nonew; 934 nnwB = aijB->nonew; 935 nnzA = aijA->keepnonzeropattern; 936 nnzB = aijB->keepnonzeropattern; 937 if (!nnzA) { 938 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 939 aijA->nonew = 0; 940 } 941 if (!nnzB) { 942 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 943 aijB->nonew = 0; 944 } 945 /* Must zero here before the next loop */ 946 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 947 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 948 for (r = 0; r < len; ++r) { 949 const PetscInt row = lrows[r] + A->rmap->rstart; 950 if (row >= A->cmap->N) continue; 951 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 952 } 953 aijA->nonew = nnwA; 954 aijB->nonew = nnwB; 955 } else { 956 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 957 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 958 } 959 ierr = PetscFree(lrows);CHKERRQ(ierr); 960 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 961 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 962 963 /* reduce nonzerostate */ 964 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 965 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 966 if (gch) A->nonzerostate++; 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 971 { 972 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 973 PetscErrorCode ierr; 974 PetscMPIInt n = A->rmap->n; 975 PetscInt i,j,r,m,len = 0; 976 PetscInt *lrows,*owners = A->rmap->range; 977 PetscMPIInt p = 0; 978 PetscSFNode *rrows; 979 PetscSF sf; 980 const PetscScalar *xx; 981 PetscScalar *bb,*mask; 982 Vec xmask,lmask; 983 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 984 const PetscInt *aj, *ii,*ridx; 985 PetscScalar *aa; 986 987 PetscFunctionBegin; 988 /* Create SF where leaves are input rows and roots are owned rows */ 989 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 990 for (r = 0; r < n; ++r) lrows[r] = -1; 991 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 992 for (r = 0; r < N; ++r) { 993 const PetscInt idx = rows[r]; 994 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 995 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 996 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 997 } 998 rrows[r].rank = p; 999 rrows[r].index = rows[r] - owners[p]; 1000 } 1001 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1002 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1003 /* Collect flags for rows to be zeroed */ 1004 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1005 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1006 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1007 /* Compress and put in row numbers */ 1008 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1009 /* zero diagonal part of matrix */ 1010 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1011 /* handle off diagonal part of matrix */ 1012 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1013 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1014 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1015 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1016 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1017 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1018 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1019 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1020 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1021 PetscBool cong; 1022 1023 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1024 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1025 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1028 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1029 } 1030 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1031 /* remove zeroed rows of off diagonal matrix */ 1032 ii = aij->i; 1033 for (i=0; i<len; i++) { 1034 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1035 } 1036 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1037 if (aij->compressedrow.use) { 1038 m = aij->compressedrow.nrows; 1039 ii = aij->compressedrow.i; 1040 ridx = aij->compressedrow.rindex; 1041 for (i=0; i<m; i++) { 1042 n = ii[i+1] - ii[i]; 1043 aj = aij->j + ii[i]; 1044 aa = aij->a + ii[i]; 1045 1046 for (j=0; j<n; j++) { 1047 if (PetscAbsScalar(mask[*aj])) { 1048 if (b) bb[*ridx] -= *aa*xx[*aj]; 1049 *aa = 0.0; 1050 } 1051 aa++; 1052 aj++; 1053 } 1054 ridx++; 1055 } 1056 } else { /* do not use compressed row format */ 1057 m = l->B->rmap->n; 1058 for (i=0; i<m; i++) { 1059 n = ii[i+1] - ii[i]; 1060 aj = aij->j + ii[i]; 1061 aa = aij->a + ii[i]; 1062 for (j=0; j<n; j++) { 1063 if (PetscAbsScalar(mask[*aj])) { 1064 if (b) bb[i] -= *aa*xx[*aj]; 1065 *aa = 0.0; 1066 } 1067 aa++; 1068 aj++; 1069 } 1070 } 1071 } 1072 if (x && b) { 1073 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1074 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1075 } 1076 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1077 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1078 ierr = PetscFree(lrows);CHKERRQ(ierr); 1079 1080 /* only change matrix nonzero state if pattern was allowed to be changed */ 1081 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1082 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1083 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1084 } 1085 PetscFunctionReturn(0); 1086 } 1087 1088 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1089 { 1090 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1091 PetscErrorCode ierr; 1092 PetscInt nt; 1093 VecScatter Mvctx = a->Mvctx; 1094 1095 PetscFunctionBegin; 1096 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1097 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1098 1099 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1100 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1101 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1102 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1107 { 1108 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1109 PetscErrorCode ierr; 1110 1111 PetscFunctionBegin; 1112 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1117 { 1118 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1119 PetscErrorCode ierr; 1120 VecScatter Mvctx = a->Mvctx; 1121 1122 PetscFunctionBegin; 1123 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1124 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1125 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1126 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1127 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1128 PetscFunctionReturn(0); 1129 } 1130 1131 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1132 { 1133 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1134 PetscErrorCode ierr; 1135 1136 PetscFunctionBegin; 1137 /* do nondiagonal part */ 1138 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1139 /* do local part */ 1140 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1141 /* add partial results together */ 1142 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1144 PetscFunctionReturn(0); 1145 } 1146 1147 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1148 { 1149 MPI_Comm comm; 1150 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1151 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1152 IS Me,Notme; 1153 PetscErrorCode ierr; 1154 PetscInt M,N,first,last,*notme,i; 1155 PetscBool lf; 1156 PetscMPIInt size; 1157 1158 PetscFunctionBegin; 1159 /* Easy test: symmetric diagonal block */ 1160 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1161 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1162 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1163 if (!*f) PetscFunctionReturn(0); 1164 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1165 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1166 if (size == 1) PetscFunctionReturn(0); 1167 1168 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1169 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1170 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1171 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1172 for (i=0; i<first; i++) notme[i] = i; 1173 for (i=last; i<M; i++) notme[i-last+first] = i; 1174 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1175 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1176 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1177 Aoff = Aoffs[0]; 1178 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1179 Boff = Boffs[0]; 1180 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1181 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1182 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1183 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1184 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1185 ierr = PetscFree(notme);CHKERRQ(ierr); 1186 PetscFunctionReturn(0); 1187 } 1188 1189 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1190 { 1191 PetscErrorCode ierr; 1192 1193 PetscFunctionBegin; 1194 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1195 PetscFunctionReturn(0); 1196 } 1197 1198 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1199 { 1200 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1201 PetscErrorCode ierr; 1202 1203 PetscFunctionBegin; 1204 /* do nondiagonal part */ 1205 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1206 /* do local part */ 1207 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1208 /* add partial results together */ 1209 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 /* 1215 This only works correctly for square matrices where the subblock A->A is the 1216 diagonal block 1217 */ 1218 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1219 { 1220 PetscErrorCode ierr; 1221 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1222 1223 PetscFunctionBegin; 1224 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1225 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1226 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1227 PetscFunctionReturn(0); 1228 } 1229 1230 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1231 { 1232 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1233 PetscErrorCode ierr; 1234 1235 PetscFunctionBegin; 1236 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1237 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1242 { 1243 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1244 PetscErrorCode ierr; 1245 1246 PetscFunctionBegin; 1247 #if defined(PETSC_USE_LOG) 1248 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1249 #endif 1250 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1251 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1252 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1253 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1254 #if defined(PETSC_USE_CTABLE) 1255 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1256 #else 1257 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1258 #endif 1259 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1260 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1261 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1262 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1263 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1264 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1265 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1266 1267 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1275 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr); 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1277 #if defined(PETSC_HAVE_ELEMENTAL) 1278 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1279 #endif 1280 #if defined(PETSC_HAVE_HYPRE) 1281 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1283 #endif 1284 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1286 PetscFunctionReturn(0); 1287 } 1288 1289 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1290 { 1291 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1292 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1293 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1294 PetscErrorCode ierr; 1295 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1296 int fd; 1297 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1298 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1299 PetscScalar *column_values; 1300 PetscInt message_count,flowcontrolcount; 1301 FILE *file; 1302 1303 PetscFunctionBegin; 1304 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1305 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1306 nz = A->nz + B->nz; 1307 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1308 if (!rank) { 1309 header[0] = MAT_FILE_CLASSID; 1310 header[1] = mat->rmap->N; 1311 header[2] = mat->cmap->N; 1312 1313 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1314 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1315 /* get largest number of rows any processor has */ 1316 rlen = mat->rmap->n; 1317 range = mat->rmap->range; 1318 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1319 } else { 1320 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1321 rlen = mat->rmap->n; 1322 } 1323 1324 /* load up the local row counts */ 1325 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1326 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1327 1328 /* store the row lengths to the file */ 1329 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1330 if (!rank) { 1331 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1332 for (i=1; i<size; i++) { 1333 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1334 rlen = range[i+1] - range[i]; 1335 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1336 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1337 } 1338 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1339 } else { 1340 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1341 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1342 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1343 } 1344 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1345 1346 /* load up the local column indices */ 1347 nzmax = nz; /* th processor needs space a largest processor needs */ 1348 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1349 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1350 cnt = 0; 1351 for (i=0; i<mat->rmap->n; i++) { 1352 for (j=B->i[i]; j<B->i[i+1]; j++) { 1353 if ((col = garray[B->j[j]]) > cstart) break; 1354 column_indices[cnt++] = col; 1355 } 1356 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1357 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1358 } 1359 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1360 1361 /* store the column indices to the file */ 1362 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1363 if (!rank) { 1364 MPI_Status status; 1365 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1366 for (i=1; i<size; i++) { 1367 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1368 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1369 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1370 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1371 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1372 } 1373 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1374 } else { 1375 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1376 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1377 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1378 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1379 } 1380 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1381 1382 /* load up the local column values */ 1383 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1384 cnt = 0; 1385 for (i=0; i<mat->rmap->n; i++) { 1386 for (j=B->i[i]; j<B->i[i+1]; j++) { 1387 if (garray[B->j[j]] > cstart) break; 1388 column_values[cnt++] = B->a[j]; 1389 } 1390 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1391 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1392 } 1393 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1394 1395 /* store the column values to the file */ 1396 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1397 if (!rank) { 1398 MPI_Status status; 1399 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1400 for (i=1; i<size; i++) { 1401 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1402 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1403 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1404 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1405 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1406 } 1407 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1408 } else { 1409 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1410 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1411 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1412 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1413 } 1414 ierr = PetscFree(column_values);CHKERRQ(ierr); 1415 1416 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1417 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1418 PetscFunctionReturn(0); 1419 } 1420 1421 #include <petscdraw.h> 1422 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1423 { 1424 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1425 PetscErrorCode ierr; 1426 PetscMPIInt rank = aij->rank,size = aij->size; 1427 PetscBool isdraw,iascii,isbinary; 1428 PetscViewer sviewer; 1429 PetscViewerFormat format; 1430 1431 PetscFunctionBegin; 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1433 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1434 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1435 if (iascii) { 1436 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1437 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1438 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1439 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1440 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1441 for (i=0; i<(PetscInt)size; i++) { 1442 nmax = PetscMax(nmax,nz[i]); 1443 nmin = PetscMin(nmin,nz[i]); 1444 navg += nz[i]; 1445 } 1446 ierr = PetscFree(nz);CHKERRQ(ierr); 1447 navg = navg/size; 1448 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1449 PetscFunctionReturn(0); 1450 } 1451 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1452 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1453 MatInfo info; 1454 PetscBool inodes; 1455 1456 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1457 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1458 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1459 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1460 if (!inodes) { 1461 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1462 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1463 } else { 1464 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1465 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1466 } 1467 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1469 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1470 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1471 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1472 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1473 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1474 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1475 PetscFunctionReturn(0); 1476 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1477 PetscInt inodecount,inodelimit,*inodes; 1478 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1479 if (inodes) { 1480 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1481 } else { 1482 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1483 } 1484 PetscFunctionReturn(0); 1485 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1486 PetscFunctionReturn(0); 1487 } 1488 } else if (isbinary) { 1489 if (size == 1) { 1490 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1491 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1492 } else { 1493 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1494 } 1495 PetscFunctionReturn(0); 1496 } else if (iascii && size == 1) { 1497 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1498 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1499 PetscFunctionReturn(0); 1500 } else if (isdraw) { 1501 PetscDraw draw; 1502 PetscBool isnull; 1503 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1504 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1505 if (isnull) PetscFunctionReturn(0); 1506 } 1507 1508 { /* assemble the entire matrix onto first processor */ 1509 Mat A = NULL, Av; 1510 IS isrow,iscol; 1511 1512 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1513 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1514 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1515 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1516 /* The commented code uses MatCreateSubMatrices instead */ 1517 /* 1518 Mat *AA, A = NULL, Av; 1519 IS isrow,iscol; 1520 1521 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1522 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1523 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1524 if (!rank) { 1525 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1526 A = AA[0]; 1527 Av = AA[0]; 1528 } 1529 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1530 */ 1531 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1532 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1533 /* 1534 Everyone has to call to draw the matrix since the graphics waits are 1535 synchronized across all processors that share the PetscDraw object 1536 */ 1537 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1538 if (!rank) { 1539 if (((PetscObject)mat)->name) { 1540 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1541 } 1542 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1543 } 1544 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1545 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1546 ierr = MatDestroy(&A);CHKERRQ(ierr); 1547 } 1548 PetscFunctionReturn(0); 1549 } 1550 1551 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1552 { 1553 PetscErrorCode ierr; 1554 PetscBool iascii,isdraw,issocket,isbinary; 1555 1556 PetscFunctionBegin; 1557 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1558 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1559 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1560 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1561 if (iascii || isdraw || isbinary || issocket) { 1562 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1563 } 1564 PetscFunctionReturn(0); 1565 } 1566 1567 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1568 { 1569 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1570 PetscErrorCode ierr; 1571 Vec bb1 = 0; 1572 PetscBool hasop; 1573 1574 PetscFunctionBegin; 1575 if (flag == SOR_APPLY_UPPER) { 1576 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1577 PetscFunctionReturn(0); 1578 } 1579 1580 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1581 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1582 } 1583 1584 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1585 if (flag & SOR_ZERO_INITIAL_GUESS) { 1586 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1587 its--; 1588 } 1589 1590 while (its--) { 1591 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1592 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1593 1594 /* update rhs: bb1 = bb - B*x */ 1595 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1596 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1597 1598 /* local sweep */ 1599 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1600 } 1601 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1602 if (flag & SOR_ZERO_INITIAL_GUESS) { 1603 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1604 its--; 1605 } 1606 while (its--) { 1607 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1608 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1609 1610 /* update rhs: bb1 = bb - B*x */ 1611 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1612 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1613 1614 /* local sweep */ 1615 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1616 } 1617 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1618 if (flag & SOR_ZERO_INITIAL_GUESS) { 1619 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1620 its--; 1621 } 1622 while (its--) { 1623 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1624 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1625 1626 /* update rhs: bb1 = bb - B*x */ 1627 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1628 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1629 1630 /* local sweep */ 1631 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1632 } 1633 } else if (flag & SOR_EISENSTAT) { 1634 Vec xx1; 1635 1636 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1637 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1638 1639 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1640 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1641 if (!mat->diag) { 1642 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1643 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1644 } 1645 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1646 if (hasop) { 1647 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1648 } else { 1649 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1650 } 1651 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1652 1653 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1654 1655 /* local sweep */ 1656 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1657 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1658 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1659 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1660 1661 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1662 1663 matin->factorerrortype = mat->A->factorerrortype; 1664 PetscFunctionReturn(0); 1665 } 1666 1667 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1668 { 1669 Mat aA,aB,Aperm; 1670 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1671 PetscScalar *aa,*ba; 1672 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1673 PetscSF rowsf,sf; 1674 IS parcolp = NULL; 1675 PetscBool done; 1676 PetscErrorCode ierr; 1677 1678 PetscFunctionBegin; 1679 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1680 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1681 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1682 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1683 1684 /* Invert row permutation to find out where my rows should go */ 1685 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1686 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1687 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1688 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1689 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1690 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1691 1692 /* Invert column permutation to find out where my columns should go */ 1693 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1694 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1695 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1696 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1697 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1698 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1699 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1700 1701 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1702 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1703 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1704 1705 /* Find out where my gcols should go */ 1706 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1707 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1708 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1709 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1710 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1711 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1712 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1713 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1714 1715 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1716 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1717 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1718 for (i=0; i<m; i++) { 1719 PetscInt row = rdest[i]; 1720 PetscMPIInt rowner; 1721 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1722 for (j=ai[i]; j<ai[i+1]; j++) { 1723 PetscInt col = cdest[aj[j]]; 1724 PetscMPIInt cowner; 1725 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1726 if (rowner == cowner) dnnz[i]++; 1727 else onnz[i]++; 1728 } 1729 for (j=bi[i]; j<bi[i+1]; j++) { 1730 PetscInt col = gcdest[bj[j]]; 1731 PetscMPIInt cowner; 1732 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1733 if (rowner == cowner) dnnz[i]++; 1734 else onnz[i]++; 1735 } 1736 } 1737 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1738 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1739 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1740 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1741 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1742 1743 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1744 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1745 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1746 for (i=0; i<m; i++) { 1747 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1748 PetscInt j0,rowlen; 1749 rowlen = ai[i+1] - ai[i]; 1750 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1751 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1752 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1753 } 1754 rowlen = bi[i+1] - bi[i]; 1755 for (j0=j=0; j<rowlen; j0=j) { 1756 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1757 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1758 } 1759 } 1760 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1761 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1762 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1763 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1764 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1765 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1766 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1767 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1768 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1769 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1770 *B = Aperm; 1771 PetscFunctionReturn(0); 1772 } 1773 1774 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1775 { 1776 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1777 PetscErrorCode ierr; 1778 1779 PetscFunctionBegin; 1780 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1781 if (ghosts) *ghosts = aij->garray; 1782 PetscFunctionReturn(0); 1783 } 1784 1785 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1786 { 1787 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1788 Mat A = mat->A,B = mat->B; 1789 PetscErrorCode ierr; 1790 PetscLogDouble isend[5],irecv[5]; 1791 1792 PetscFunctionBegin; 1793 info->block_size = 1.0; 1794 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1795 1796 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1797 isend[3] = info->memory; isend[4] = info->mallocs; 1798 1799 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1800 1801 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1802 isend[3] += info->memory; isend[4] += info->mallocs; 1803 if (flag == MAT_LOCAL) { 1804 info->nz_used = isend[0]; 1805 info->nz_allocated = isend[1]; 1806 info->nz_unneeded = isend[2]; 1807 info->memory = isend[3]; 1808 info->mallocs = isend[4]; 1809 } else if (flag == MAT_GLOBAL_MAX) { 1810 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1811 1812 info->nz_used = irecv[0]; 1813 info->nz_allocated = irecv[1]; 1814 info->nz_unneeded = irecv[2]; 1815 info->memory = irecv[3]; 1816 info->mallocs = irecv[4]; 1817 } else if (flag == MAT_GLOBAL_SUM) { 1818 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1819 1820 info->nz_used = irecv[0]; 1821 info->nz_allocated = irecv[1]; 1822 info->nz_unneeded = irecv[2]; 1823 info->memory = irecv[3]; 1824 info->mallocs = irecv[4]; 1825 } 1826 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1827 info->fill_ratio_needed = 0; 1828 info->factor_mallocs = 0; 1829 PetscFunctionReturn(0); 1830 } 1831 1832 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1833 { 1834 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1835 PetscErrorCode ierr; 1836 1837 PetscFunctionBegin; 1838 switch (op) { 1839 case MAT_NEW_NONZERO_LOCATIONS: 1840 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1841 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1842 case MAT_KEEP_NONZERO_PATTERN: 1843 case MAT_NEW_NONZERO_LOCATION_ERR: 1844 case MAT_USE_INODES: 1845 case MAT_IGNORE_ZERO_ENTRIES: 1846 MatCheckPreallocated(A,1); 1847 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1848 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1849 break; 1850 case MAT_ROW_ORIENTED: 1851 MatCheckPreallocated(A,1); 1852 a->roworiented = flg; 1853 1854 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1855 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1856 break; 1857 case MAT_NEW_DIAGONALS: 1858 case MAT_SORTED_FULL: 1859 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1860 break; 1861 case MAT_IGNORE_OFF_PROC_ENTRIES: 1862 a->donotstash = flg; 1863 break; 1864 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1865 case MAT_SPD: 1866 case MAT_SYMMETRIC: 1867 case MAT_STRUCTURALLY_SYMMETRIC: 1868 case MAT_HERMITIAN: 1869 case MAT_SYMMETRY_ETERNAL: 1870 break; 1871 case MAT_SUBMAT_SINGLEIS: 1872 A->submat_singleis = flg; 1873 break; 1874 case MAT_STRUCTURE_ONLY: 1875 /* The option is handled directly by MatSetOption() */ 1876 break; 1877 default: 1878 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1879 } 1880 PetscFunctionReturn(0); 1881 } 1882 1883 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1884 { 1885 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1886 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1887 PetscErrorCode ierr; 1888 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1889 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1890 PetscInt *cmap,*idx_p; 1891 1892 PetscFunctionBegin; 1893 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1894 mat->getrowactive = PETSC_TRUE; 1895 1896 if (!mat->rowvalues && (idx || v)) { 1897 /* 1898 allocate enough space to hold information from the longest row. 1899 */ 1900 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1901 PetscInt max = 1,tmp; 1902 for (i=0; i<matin->rmap->n; i++) { 1903 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1904 if (max < tmp) max = tmp; 1905 } 1906 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1907 } 1908 1909 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1910 lrow = row - rstart; 1911 1912 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1913 if (!v) {pvA = 0; pvB = 0;} 1914 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1915 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1916 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1917 nztot = nzA + nzB; 1918 1919 cmap = mat->garray; 1920 if (v || idx) { 1921 if (nztot) { 1922 /* Sort by increasing column numbers, assuming A and B already sorted */ 1923 PetscInt imark = -1; 1924 if (v) { 1925 *v = v_p = mat->rowvalues; 1926 for (i=0; i<nzB; i++) { 1927 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1928 else break; 1929 } 1930 imark = i; 1931 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1932 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1933 } 1934 if (idx) { 1935 *idx = idx_p = mat->rowindices; 1936 if (imark > -1) { 1937 for (i=0; i<imark; i++) { 1938 idx_p[i] = cmap[cworkB[i]]; 1939 } 1940 } else { 1941 for (i=0; i<nzB; i++) { 1942 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1943 else break; 1944 } 1945 imark = i; 1946 } 1947 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1948 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1949 } 1950 } else { 1951 if (idx) *idx = 0; 1952 if (v) *v = 0; 1953 } 1954 } 1955 *nz = nztot; 1956 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1957 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1958 PetscFunctionReturn(0); 1959 } 1960 1961 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1962 { 1963 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1964 1965 PetscFunctionBegin; 1966 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1967 aij->getrowactive = PETSC_FALSE; 1968 PetscFunctionReturn(0); 1969 } 1970 1971 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1972 { 1973 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1974 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1975 PetscErrorCode ierr; 1976 PetscInt i,j,cstart = mat->cmap->rstart; 1977 PetscReal sum = 0.0; 1978 MatScalar *v; 1979 1980 PetscFunctionBegin; 1981 if (aij->size == 1) { 1982 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1983 } else { 1984 if (type == NORM_FROBENIUS) { 1985 v = amat->a; 1986 for (i=0; i<amat->nz; i++) { 1987 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1988 } 1989 v = bmat->a; 1990 for (i=0; i<bmat->nz; i++) { 1991 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1992 } 1993 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1994 *norm = PetscSqrtReal(*norm); 1995 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1996 } else if (type == NORM_1) { /* max column norm */ 1997 PetscReal *tmp,*tmp2; 1998 PetscInt *jj,*garray = aij->garray; 1999 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 2000 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 2001 *norm = 0.0; 2002 v = amat->a; jj = amat->j; 2003 for (j=0; j<amat->nz; j++) { 2004 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2005 } 2006 v = bmat->a; jj = bmat->j; 2007 for (j=0; j<bmat->nz; j++) { 2008 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2009 } 2010 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2011 for (j=0; j<mat->cmap->N; j++) { 2012 if (tmp2[j] > *norm) *norm = tmp2[j]; 2013 } 2014 ierr = PetscFree(tmp);CHKERRQ(ierr); 2015 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2016 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2017 } else if (type == NORM_INFINITY) { /* max row norm */ 2018 PetscReal ntemp = 0.0; 2019 for (j=0; j<aij->A->rmap->n; j++) { 2020 v = amat->a + amat->i[j]; 2021 sum = 0.0; 2022 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2023 sum += PetscAbsScalar(*v); v++; 2024 } 2025 v = bmat->a + bmat->i[j]; 2026 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2027 sum += PetscAbsScalar(*v); v++; 2028 } 2029 if (sum > ntemp) ntemp = sum; 2030 } 2031 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2032 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2033 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2034 } 2035 PetscFunctionReturn(0); 2036 } 2037 2038 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2039 { 2040 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2041 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2042 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2043 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2044 PetscErrorCode ierr; 2045 Mat B,A_diag,*B_diag; 2046 const MatScalar *array; 2047 2048 PetscFunctionBegin; 2049 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2050 ai = Aloc->i; aj = Aloc->j; 2051 bi = Bloc->i; bj = Bloc->j; 2052 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2053 PetscInt *d_nnz,*g_nnz,*o_nnz; 2054 PetscSFNode *oloc; 2055 PETSC_UNUSED PetscSF sf; 2056 2057 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2058 /* compute d_nnz for preallocation */ 2059 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2060 for (i=0; i<ai[ma]; i++) { 2061 d_nnz[aj[i]]++; 2062 } 2063 /* compute local off-diagonal contributions */ 2064 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2065 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2066 /* map those to global */ 2067 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2068 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2069 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2070 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2071 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2072 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2073 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2074 2075 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2076 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2077 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2078 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2079 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2080 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2081 } else { 2082 B = *matout; 2083 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2084 } 2085 2086 b = (Mat_MPIAIJ*)B->data; 2087 A_diag = a->A; 2088 B_diag = &b->A; 2089 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2090 A_diag_ncol = A_diag->cmap->N; 2091 B_diag_ilen = sub_B_diag->ilen; 2092 B_diag_i = sub_B_diag->i; 2093 2094 /* Set ilen for diagonal of B */ 2095 for (i=0; i<A_diag_ncol; i++) { 2096 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2097 } 2098 2099 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2100 very quickly (=without using MatSetValues), because all writes are local. */ 2101 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2102 2103 /* copy over the B part */ 2104 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2105 array = Bloc->a; 2106 row = A->rmap->rstart; 2107 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2108 cols_tmp = cols; 2109 for (i=0; i<mb; i++) { 2110 ncol = bi[i+1]-bi[i]; 2111 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2112 row++; 2113 array += ncol; cols_tmp += ncol; 2114 } 2115 ierr = PetscFree(cols);CHKERRQ(ierr); 2116 2117 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2118 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2119 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2120 *matout = B; 2121 } else { 2122 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2123 } 2124 PetscFunctionReturn(0); 2125 } 2126 2127 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2128 { 2129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2130 Mat a = aij->A,b = aij->B; 2131 PetscErrorCode ierr; 2132 PetscInt s1,s2,s3; 2133 2134 PetscFunctionBegin; 2135 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2136 if (rr) { 2137 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2138 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2139 /* Overlap communication with computation. */ 2140 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2141 } 2142 if (ll) { 2143 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2144 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2145 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2146 } 2147 /* scale the diagonal block */ 2148 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2149 2150 if (rr) { 2151 /* Do a scatter end and then right scale the off-diagonal block */ 2152 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2153 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2154 } 2155 PetscFunctionReturn(0); 2156 } 2157 2158 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2159 { 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2161 PetscErrorCode ierr; 2162 2163 PetscFunctionBegin; 2164 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2169 { 2170 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2171 Mat a,b,c,d; 2172 PetscBool flg; 2173 PetscErrorCode ierr; 2174 2175 PetscFunctionBegin; 2176 a = matA->A; b = matA->B; 2177 c = matB->A; d = matB->B; 2178 2179 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2180 if (flg) { 2181 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2182 } 2183 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2188 { 2189 PetscErrorCode ierr; 2190 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2191 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2192 2193 PetscFunctionBegin; 2194 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2195 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2196 /* because of the column compression in the off-processor part of the matrix a->B, 2197 the number of columns in a->B and b->B may be different, hence we cannot call 2198 the MatCopy() directly on the two parts. If need be, we can provide a more 2199 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2200 then copying the submatrices */ 2201 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2202 } else { 2203 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2204 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2205 } 2206 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2207 PetscFunctionReturn(0); 2208 } 2209 2210 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2211 { 2212 PetscErrorCode ierr; 2213 2214 PetscFunctionBegin; 2215 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2216 PetscFunctionReturn(0); 2217 } 2218 2219 /* 2220 Computes the number of nonzeros per row needed for preallocation when X and Y 2221 have different nonzero structure. 2222 */ 2223 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2224 { 2225 PetscInt i,j,k,nzx,nzy; 2226 2227 PetscFunctionBegin; 2228 /* Set the number of nonzeros in the new matrix */ 2229 for (i=0; i<m; i++) { 2230 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2231 nzx = xi[i+1] - xi[i]; 2232 nzy = yi[i+1] - yi[i]; 2233 nnz[i] = 0; 2234 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2235 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2236 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2237 nnz[i]++; 2238 } 2239 for (; k<nzy; k++) nnz[i]++; 2240 } 2241 PetscFunctionReturn(0); 2242 } 2243 2244 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2245 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2246 { 2247 PetscErrorCode ierr; 2248 PetscInt m = Y->rmap->N; 2249 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2250 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2251 2252 PetscFunctionBegin; 2253 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2254 PetscFunctionReturn(0); 2255 } 2256 2257 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2258 { 2259 PetscErrorCode ierr; 2260 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2261 PetscBLASInt bnz,one=1; 2262 Mat_SeqAIJ *x,*y; 2263 2264 PetscFunctionBegin; 2265 if (str == SAME_NONZERO_PATTERN) { 2266 PetscScalar alpha = a; 2267 x = (Mat_SeqAIJ*)xx->A->data; 2268 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2269 y = (Mat_SeqAIJ*)yy->A->data; 2270 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2271 x = (Mat_SeqAIJ*)xx->B->data; 2272 y = (Mat_SeqAIJ*)yy->B->data; 2273 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2274 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2275 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2276 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2277 will be updated */ 2278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2279 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2280 Y->offloadmask = PETSC_OFFLOAD_CPU; 2281 } 2282 #endif 2283 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2284 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2285 } else { 2286 Mat B; 2287 PetscInt *nnz_d,*nnz_o; 2288 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2289 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2290 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2291 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2292 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2293 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2294 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2295 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2296 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2297 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2298 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2299 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2300 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2301 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2302 } 2303 PetscFunctionReturn(0); 2304 } 2305 2306 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2307 2308 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2309 { 2310 #if defined(PETSC_USE_COMPLEX) 2311 PetscErrorCode ierr; 2312 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2313 2314 PetscFunctionBegin; 2315 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2316 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2317 #else 2318 PetscFunctionBegin; 2319 #endif 2320 PetscFunctionReturn(0); 2321 } 2322 2323 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2324 { 2325 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2326 PetscErrorCode ierr; 2327 2328 PetscFunctionBegin; 2329 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2330 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2331 PetscFunctionReturn(0); 2332 } 2333 2334 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2335 { 2336 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2337 PetscErrorCode ierr; 2338 2339 PetscFunctionBegin; 2340 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2341 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2342 PetscFunctionReturn(0); 2343 } 2344 2345 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2346 { 2347 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2348 PetscErrorCode ierr; 2349 PetscInt i,*idxb = 0; 2350 PetscScalar *va,*vb; 2351 Vec vtmp; 2352 2353 PetscFunctionBegin; 2354 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2355 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2356 if (idx) { 2357 for (i=0; i<A->rmap->n; i++) { 2358 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2359 } 2360 } 2361 2362 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2363 if (idx) { 2364 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2365 } 2366 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2367 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2368 2369 for (i=0; i<A->rmap->n; i++) { 2370 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2371 va[i] = vb[i]; 2372 if (idx) idx[i] = a->garray[idxb[i]]; 2373 } 2374 } 2375 2376 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2377 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2378 ierr = PetscFree(idxb);CHKERRQ(ierr); 2379 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2384 { 2385 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2386 PetscErrorCode ierr; 2387 PetscInt i,*idxb = 0; 2388 PetscScalar *va,*vb; 2389 Vec vtmp; 2390 2391 PetscFunctionBegin; 2392 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2393 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2394 if (idx) { 2395 for (i=0; i<A->cmap->n; i++) { 2396 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2397 } 2398 } 2399 2400 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2401 if (idx) { 2402 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2403 } 2404 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2405 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2406 2407 for (i=0; i<A->rmap->n; i++) { 2408 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2409 va[i] = vb[i]; 2410 if (idx) idx[i] = a->garray[idxb[i]]; 2411 } 2412 } 2413 2414 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2415 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2416 ierr = PetscFree(idxb);CHKERRQ(ierr); 2417 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2422 { 2423 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2424 PetscInt n = A->rmap->n; 2425 PetscInt cstart = A->cmap->rstart; 2426 PetscInt *cmap = mat->garray; 2427 PetscInt *diagIdx, *offdiagIdx; 2428 Vec diagV, offdiagV; 2429 PetscScalar *a, *diagA, *offdiagA; 2430 PetscInt r; 2431 PetscErrorCode ierr; 2432 2433 PetscFunctionBegin; 2434 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2435 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2436 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2437 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2438 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2439 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2440 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2441 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2442 for (r = 0; r < n; ++r) { 2443 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2444 a[r] = diagA[r]; 2445 idx[r] = cstart + diagIdx[r]; 2446 } else { 2447 a[r] = offdiagA[r]; 2448 idx[r] = cmap[offdiagIdx[r]]; 2449 } 2450 } 2451 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2452 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2453 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2454 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2455 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2456 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2457 PetscFunctionReturn(0); 2458 } 2459 2460 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2461 { 2462 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2463 PetscInt n = A->rmap->n; 2464 PetscInt cstart = A->cmap->rstart; 2465 PetscInt *cmap = mat->garray; 2466 PetscInt *diagIdx, *offdiagIdx; 2467 Vec diagV, offdiagV; 2468 PetscScalar *a, *diagA, *offdiagA; 2469 PetscInt r; 2470 PetscErrorCode ierr; 2471 2472 PetscFunctionBegin; 2473 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2474 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2475 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2476 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2477 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2478 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2479 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2480 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2481 for (r = 0; r < n; ++r) { 2482 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2483 a[r] = diagA[r]; 2484 idx[r] = cstart + diagIdx[r]; 2485 } else { 2486 a[r] = offdiagA[r]; 2487 idx[r] = cmap[offdiagIdx[r]]; 2488 } 2489 } 2490 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2491 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2492 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2493 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2494 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2495 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2496 PetscFunctionReturn(0); 2497 } 2498 2499 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2500 { 2501 PetscErrorCode ierr; 2502 Mat *dummy; 2503 2504 PetscFunctionBegin; 2505 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2506 *newmat = *dummy; 2507 ierr = PetscFree(dummy);CHKERRQ(ierr); 2508 PetscFunctionReturn(0); 2509 } 2510 2511 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2512 { 2513 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2514 PetscErrorCode ierr; 2515 2516 PetscFunctionBegin; 2517 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2518 A->factorerrortype = a->A->factorerrortype; 2519 PetscFunctionReturn(0); 2520 } 2521 2522 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2523 { 2524 PetscErrorCode ierr; 2525 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2526 2527 PetscFunctionBegin; 2528 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2529 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2530 if (x->assembled) { 2531 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2532 } else { 2533 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2534 } 2535 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2536 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2537 PetscFunctionReturn(0); 2538 } 2539 2540 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2541 { 2542 PetscFunctionBegin; 2543 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2544 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2545 PetscFunctionReturn(0); 2546 } 2547 2548 /*@ 2549 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2550 2551 Collective on Mat 2552 2553 Input Parameters: 2554 + A - the matrix 2555 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2556 2557 Level: advanced 2558 2559 @*/ 2560 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2561 { 2562 PetscErrorCode ierr; 2563 2564 PetscFunctionBegin; 2565 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2566 PetscFunctionReturn(0); 2567 } 2568 2569 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2570 { 2571 PetscErrorCode ierr; 2572 PetscBool sc = PETSC_FALSE,flg; 2573 2574 PetscFunctionBegin; 2575 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2576 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2577 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2578 if (flg) { 2579 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2580 } 2581 ierr = PetscOptionsTail();CHKERRQ(ierr); 2582 PetscFunctionReturn(0); 2583 } 2584 2585 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2586 { 2587 PetscErrorCode ierr; 2588 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2589 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2590 2591 PetscFunctionBegin; 2592 if (!Y->preallocated) { 2593 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2594 } else if (!aij->nz) { 2595 PetscInt nonew = aij->nonew; 2596 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2597 aij->nonew = nonew; 2598 } 2599 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2600 PetscFunctionReturn(0); 2601 } 2602 2603 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2604 { 2605 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2606 PetscErrorCode ierr; 2607 2608 PetscFunctionBegin; 2609 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2610 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2611 if (d) { 2612 PetscInt rstart; 2613 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2614 *d += rstart; 2615 2616 } 2617 PetscFunctionReturn(0); 2618 } 2619 2620 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2621 { 2622 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2623 PetscErrorCode ierr; 2624 2625 PetscFunctionBegin; 2626 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2627 PetscFunctionReturn(0); 2628 } 2629 2630 /* -------------------------------------------------------------------*/ 2631 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2632 MatGetRow_MPIAIJ, 2633 MatRestoreRow_MPIAIJ, 2634 MatMult_MPIAIJ, 2635 /* 4*/ MatMultAdd_MPIAIJ, 2636 MatMultTranspose_MPIAIJ, 2637 MatMultTransposeAdd_MPIAIJ, 2638 0, 2639 0, 2640 0, 2641 /*10*/ 0, 2642 0, 2643 0, 2644 MatSOR_MPIAIJ, 2645 MatTranspose_MPIAIJ, 2646 /*15*/ MatGetInfo_MPIAIJ, 2647 MatEqual_MPIAIJ, 2648 MatGetDiagonal_MPIAIJ, 2649 MatDiagonalScale_MPIAIJ, 2650 MatNorm_MPIAIJ, 2651 /*20*/ MatAssemblyBegin_MPIAIJ, 2652 MatAssemblyEnd_MPIAIJ, 2653 MatSetOption_MPIAIJ, 2654 MatZeroEntries_MPIAIJ, 2655 /*24*/ MatZeroRows_MPIAIJ, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /*29*/ MatSetUp_MPIAIJ, 2661 0, 2662 0, 2663 MatGetDiagonalBlock_MPIAIJ, 2664 0, 2665 /*34*/ MatDuplicate_MPIAIJ, 2666 0, 2667 0, 2668 0, 2669 0, 2670 /*39*/ MatAXPY_MPIAIJ, 2671 MatCreateSubMatrices_MPIAIJ, 2672 MatIncreaseOverlap_MPIAIJ, 2673 MatGetValues_MPIAIJ, 2674 MatCopy_MPIAIJ, 2675 /*44*/ MatGetRowMax_MPIAIJ, 2676 MatScale_MPIAIJ, 2677 MatShift_MPIAIJ, 2678 MatDiagonalSet_MPIAIJ, 2679 MatZeroRowsColumns_MPIAIJ, 2680 /*49*/ MatSetRandom_MPIAIJ, 2681 0, 2682 0, 2683 0, 2684 0, 2685 /*54*/ MatFDColoringCreate_MPIXAIJ, 2686 0, 2687 MatSetUnfactored_MPIAIJ, 2688 MatPermute_MPIAIJ, 2689 0, 2690 /*59*/ MatCreateSubMatrix_MPIAIJ, 2691 MatDestroy_MPIAIJ, 2692 MatView_MPIAIJ, 2693 0, 2694 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2695 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2696 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2697 0, 2698 0, 2699 0, 2700 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2701 MatGetRowMinAbs_MPIAIJ, 2702 0, 2703 0, 2704 0, 2705 0, 2706 /*75*/ MatFDColoringApply_AIJ, 2707 MatSetFromOptions_MPIAIJ, 2708 0, 2709 0, 2710 MatFindZeroDiagonals_MPIAIJ, 2711 /*80*/ 0, 2712 0, 2713 0, 2714 /*83*/ MatLoad_MPIAIJ, 2715 MatIsSymmetric_MPIAIJ, 2716 0, 2717 0, 2718 0, 2719 0, 2720 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2721 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2722 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2723 MatPtAP_MPIAIJ_MPIAIJ, 2724 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2725 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2726 0, 2727 0, 2728 0, 2729 MatPinToCPU_MPIAIJ, 2730 /*99*/ 0, 2731 0, 2732 0, 2733 MatConjugate_MPIAIJ, 2734 0, 2735 /*104*/MatSetValuesRow_MPIAIJ, 2736 MatRealPart_MPIAIJ, 2737 MatImaginaryPart_MPIAIJ, 2738 0, 2739 0, 2740 /*109*/0, 2741 0, 2742 MatGetRowMin_MPIAIJ, 2743 0, 2744 MatMissingDiagonal_MPIAIJ, 2745 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2746 0, 2747 MatGetGhosts_MPIAIJ, 2748 0, 2749 0, 2750 /*119*/0, 2751 0, 2752 0, 2753 0, 2754 MatGetMultiProcBlock_MPIAIJ, 2755 /*124*/MatFindNonzeroRows_MPIAIJ, 2756 MatGetColumnNorms_MPIAIJ, 2757 MatInvertBlockDiagonal_MPIAIJ, 2758 MatInvertVariableBlockDiagonal_MPIAIJ, 2759 MatCreateSubMatricesMPI_MPIAIJ, 2760 /*129*/0, 2761 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2762 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2763 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2764 0, 2765 /*134*/0, 2766 0, 2767 MatRARt_MPIAIJ_MPIAIJ, 2768 0, 2769 0, 2770 /*139*/MatSetBlockSizes_MPIAIJ, 2771 0, 2772 0, 2773 MatFDColoringSetUp_MPIXAIJ, 2774 MatFindOffBlockDiagonalEntries_MPIAIJ, 2775 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2776 }; 2777 2778 /* ----------------------------------------------------------------------------------------*/ 2779 2780 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2781 { 2782 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2783 PetscErrorCode ierr; 2784 2785 PetscFunctionBegin; 2786 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2787 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2788 PetscFunctionReturn(0); 2789 } 2790 2791 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2792 { 2793 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2794 PetscErrorCode ierr; 2795 2796 PetscFunctionBegin; 2797 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2798 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2799 PetscFunctionReturn(0); 2800 } 2801 2802 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2803 { 2804 Mat_MPIAIJ *b; 2805 PetscErrorCode ierr; 2806 PetscMPIInt size; 2807 2808 PetscFunctionBegin; 2809 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2810 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2811 b = (Mat_MPIAIJ*)B->data; 2812 2813 #if defined(PETSC_USE_CTABLE) 2814 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2815 #else 2816 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2817 #endif 2818 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2819 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2820 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2821 2822 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2823 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2824 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2825 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2826 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2827 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2828 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2829 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2830 2831 if (!B->preallocated) { 2832 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2833 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2834 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2835 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2836 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2837 } 2838 2839 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2840 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2841 B->preallocated = PETSC_TRUE; 2842 B->was_assembled = PETSC_FALSE; 2843 B->assembled = PETSC_FALSE; 2844 PetscFunctionReturn(0); 2845 } 2846 2847 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2848 { 2849 Mat_MPIAIJ *b; 2850 PetscErrorCode ierr; 2851 2852 PetscFunctionBegin; 2853 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2854 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2855 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2856 b = (Mat_MPIAIJ*)B->data; 2857 2858 #if defined(PETSC_USE_CTABLE) 2859 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2860 #else 2861 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2862 #endif 2863 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2864 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2865 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2866 2867 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2868 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2869 B->preallocated = PETSC_TRUE; 2870 B->was_assembled = PETSC_FALSE; 2871 B->assembled = PETSC_FALSE; 2872 PetscFunctionReturn(0); 2873 } 2874 2875 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2876 { 2877 Mat mat; 2878 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2879 PetscErrorCode ierr; 2880 2881 PetscFunctionBegin; 2882 *newmat = 0; 2883 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2884 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2885 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2886 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2887 a = (Mat_MPIAIJ*)mat->data; 2888 2889 mat->factortype = matin->factortype; 2890 mat->assembled = PETSC_TRUE; 2891 mat->insertmode = NOT_SET_VALUES; 2892 mat->preallocated = PETSC_TRUE; 2893 2894 a->size = oldmat->size; 2895 a->rank = oldmat->rank; 2896 a->donotstash = oldmat->donotstash; 2897 a->roworiented = oldmat->roworiented; 2898 a->rowindices = 0; 2899 a->rowvalues = 0; 2900 a->getrowactive = PETSC_FALSE; 2901 2902 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2903 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2904 2905 if (oldmat->colmap) { 2906 #if defined(PETSC_USE_CTABLE) 2907 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2908 #else 2909 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2910 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2911 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2912 #endif 2913 } else a->colmap = 0; 2914 if (oldmat->garray) { 2915 PetscInt len; 2916 len = oldmat->B->cmap->n; 2917 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2918 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2919 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2920 } else a->garray = 0; 2921 2922 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2923 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2924 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2925 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2926 2927 if (oldmat->Mvctx_mpi1) { 2928 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2929 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2930 } 2931 2932 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2933 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2934 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2935 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2936 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2937 *newmat = mat; 2938 PetscFunctionReturn(0); 2939 } 2940 2941 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2942 { 2943 PetscBool isbinary, ishdf5; 2944 PetscErrorCode ierr; 2945 2946 PetscFunctionBegin; 2947 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2948 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2949 /* force binary viewer to load .info file if it has not yet done so */ 2950 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2951 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2952 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2953 if (isbinary) { 2954 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2955 } else if (ishdf5) { 2956 #if defined(PETSC_HAVE_HDF5) 2957 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2958 #else 2959 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2960 #endif 2961 } else { 2962 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2963 } 2964 PetscFunctionReturn(0); 2965 } 2966 2967 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2968 { 2969 PetscScalar *vals,*svals; 2970 MPI_Comm comm; 2971 PetscErrorCode ierr; 2972 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2973 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2974 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2975 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2976 PetscInt cend,cstart,n,*rowners; 2977 int fd; 2978 PetscInt bs = newMat->rmap->bs; 2979 2980 PetscFunctionBegin; 2981 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2982 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2983 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2984 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2985 if (!rank) { 2986 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2987 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2988 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2989 } 2990 2991 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2992 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2993 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2994 if (bs < 0) bs = 1; 2995 2996 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2997 M = header[1]; N = header[2]; 2998 2999 /* If global sizes are set, check if they are consistent with that given in the file */ 3000 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 3001 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 3002 3003 /* determine ownership of all (block) rows */ 3004 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3005 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3006 else m = newMat->rmap->n; /* Set by user */ 3007 3008 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 3009 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3010 3011 /* First process needs enough room for process with most rows */ 3012 if (!rank) { 3013 mmax = rowners[1]; 3014 for (i=2; i<=size; i++) { 3015 mmax = PetscMax(mmax, rowners[i]); 3016 } 3017 } else mmax = -1; /* unused, but compilers complain */ 3018 3019 rowners[0] = 0; 3020 for (i=2; i<=size; i++) { 3021 rowners[i] += rowners[i-1]; 3022 } 3023 rstart = rowners[rank]; 3024 rend = rowners[rank+1]; 3025 3026 /* distribute row lengths to all processors */ 3027 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3028 if (!rank) { 3029 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 3030 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3031 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3032 for (j=0; j<m; j++) { 3033 procsnz[0] += ourlens[j]; 3034 } 3035 for (i=1; i<size; i++) { 3036 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 3037 /* calculate the number of nonzeros on each processor */ 3038 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3039 procsnz[i] += rowlengths[j]; 3040 } 3041 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3042 } 3043 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3044 } else { 3045 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3046 } 3047 3048 if (!rank) { 3049 /* determine max buffer needed and allocate it */ 3050 maxnz = 0; 3051 for (i=0; i<size; i++) { 3052 maxnz = PetscMax(maxnz,procsnz[i]); 3053 } 3054 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3055 3056 /* read in my part of the matrix column indices */ 3057 nz = procsnz[0]; 3058 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3059 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3060 3061 /* read in every one elses and ship off */ 3062 for (i=1; i<size; i++) { 3063 nz = procsnz[i]; 3064 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3065 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3066 } 3067 ierr = PetscFree(cols);CHKERRQ(ierr); 3068 } else { 3069 /* determine buffer space needed for message */ 3070 nz = 0; 3071 for (i=0; i<m; i++) { 3072 nz += ourlens[i]; 3073 } 3074 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3075 3076 /* receive message of column indices*/ 3077 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3078 } 3079 3080 /* determine column ownership if matrix is not square */ 3081 if (N != M) { 3082 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3083 else n = newMat->cmap->n; 3084 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3085 cstart = cend - n; 3086 } else { 3087 cstart = rstart; 3088 cend = rend; 3089 n = cend - cstart; 3090 } 3091 3092 /* loop over local rows, determining number of off diagonal entries */ 3093 ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr); 3094 jj = 0; 3095 for (i=0; i<m; i++) { 3096 for (j=0; j<ourlens[i]; j++) { 3097 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3098 jj++; 3099 } 3100 } 3101 3102 for (i=0; i<m; i++) { 3103 ourlens[i] -= offlens[i]; 3104 } 3105 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3106 3107 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3108 3109 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3110 3111 for (i=0; i<m; i++) { 3112 ourlens[i] += offlens[i]; 3113 } 3114 3115 if (!rank) { 3116 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3117 3118 /* read in my part of the matrix numerical values */ 3119 nz = procsnz[0]; 3120 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3121 3122 /* insert into matrix */ 3123 jj = rstart; 3124 smycols = mycols; 3125 svals = vals; 3126 for (i=0; i<m; i++) { 3127 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3128 smycols += ourlens[i]; 3129 svals += ourlens[i]; 3130 jj++; 3131 } 3132 3133 /* read in other processors and ship out */ 3134 for (i=1; i<size; i++) { 3135 nz = procsnz[i]; 3136 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3137 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3138 } 3139 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3140 } else { 3141 /* receive numeric values */ 3142 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3143 3144 /* receive message of values*/ 3145 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3146 3147 /* insert into matrix */ 3148 jj = rstart; 3149 smycols = mycols; 3150 svals = vals; 3151 for (i=0; i<m; i++) { 3152 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3153 smycols += ourlens[i]; 3154 svals += ourlens[i]; 3155 jj++; 3156 } 3157 } 3158 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3159 ierr = PetscFree(vals);CHKERRQ(ierr); 3160 ierr = PetscFree(mycols);CHKERRQ(ierr); 3161 ierr = PetscFree(rowners);CHKERRQ(ierr); 3162 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3163 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3164 PetscFunctionReturn(0); 3165 } 3166 3167 /* Not scalable because of ISAllGather() unless getting all columns. */ 3168 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3169 { 3170 PetscErrorCode ierr; 3171 IS iscol_local; 3172 PetscBool isstride; 3173 PetscMPIInt lisstride=0,gisstride; 3174 3175 PetscFunctionBegin; 3176 /* check if we are grabbing all columns*/ 3177 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3178 3179 if (isstride) { 3180 PetscInt start,len,mstart,mlen; 3181 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3182 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3183 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3184 if (mstart == start && mlen-mstart == len) lisstride = 1; 3185 } 3186 3187 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3188 if (gisstride) { 3189 PetscInt N; 3190 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3191 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr); 3192 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3193 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3194 } else { 3195 PetscInt cbs; 3196 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3197 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3198 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3199 } 3200 3201 *isseq = iscol_local; 3202 PetscFunctionReturn(0); 3203 } 3204 3205 /* 3206 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3207 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3208 3209 Input Parameters: 3210 mat - matrix 3211 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3212 i.e., mat->rstart <= isrow[i] < mat->rend 3213 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3214 i.e., mat->cstart <= iscol[i] < mat->cend 3215 Output Parameter: 3216 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3217 iscol_o - sequential column index set for retrieving mat->B 3218 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3219 */ 3220 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3221 { 3222 PetscErrorCode ierr; 3223 Vec x,cmap; 3224 const PetscInt *is_idx; 3225 PetscScalar *xarray,*cmaparray; 3226 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3227 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3228 Mat B=a->B; 3229 Vec lvec=a->lvec,lcmap; 3230 PetscInt i,cstart,cend,Bn=B->cmap->N; 3231 MPI_Comm comm; 3232 VecScatter Mvctx=a->Mvctx; 3233 3234 PetscFunctionBegin; 3235 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3236 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3237 3238 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3239 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3240 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3241 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3242 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3243 3244 /* Get start indices */ 3245 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3246 isstart -= ncols; 3247 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3248 3249 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3250 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3251 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3252 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3253 for (i=0; i<ncols; i++) { 3254 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3255 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3256 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3257 } 3258 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3259 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3260 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3261 3262 /* Get iscol_d */ 3263 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3264 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3265 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3266 3267 /* Get isrow_d */ 3268 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3269 rstart = mat->rmap->rstart; 3270 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3271 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3272 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3273 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3274 3275 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3276 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3277 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3278 3279 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3280 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3281 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3282 3283 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3284 3285 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3286 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3287 3288 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3289 /* off-process column indices */ 3290 count = 0; 3291 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3292 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3293 3294 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3295 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3296 for (i=0; i<Bn; i++) { 3297 if (PetscRealPart(xarray[i]) > -1.0) { 3298 idx[count] = i; /* local column index in off-diagonal part B */ 3299 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3300 count++; 3301 } 3302 } 3303 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3304 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3305 3306 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3307 /* cannot ensure iscol_o has same blocksize as iscol! */ 3308 3309 ierr = PetscFree(idx);CHKERRQ(ierr); 3310 *garray = cmap1; 3311 3312 ierr = VecDestroy(&x);CHKERRQ(ierr); 3313 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3314 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3315 PetscFunctionReturn(0); 3316 } 3317 3318 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3319 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3320 { 3321 PetscErrorCode ierr; 3322 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3323 Mat M = NULL; 3324 MPI_Comm comm; 3325 IS iscol_d,isrow_d,iscol_o; 3326 Mat Asub = NULL,Bsub = NULL; 3327 PetscInt n; 3328 3329 PetscFunctionBegin; 3330 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3331 3332 if (call == MAT_REUSE_MATRIX) { 3333 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3334 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3335 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3336 3337 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3338 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3339 3340 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3341 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3342 3343 /* Update diagonal and off-diagonal portions of submat */ 3344 asub = (Mat_MPIAIJ*)(*submat)->data; 3345 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3346 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3347 if (n) { 3348 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3349 } 3350 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3351 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3352 3353 } else { /* call == MAT_INITIAL_MATRIX) */ 3354 const PetscInt *garray; 3355 PetscInt BsubN; 3356 3357 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3358 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3359 3360 /* Create local submatrices Asub and Bsub */ 3361 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3362 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3363 3364 /* Create submatrix M */ 3365 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3366 3367 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3368 asub = (Mat_MPIAIJ*)M->data; 3369 3370 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3371 n = asub->B->cmap->N; 3372 if (BsubN > n) { 3373 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3374 const PetscInt *idx; 3375 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3376 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3377 3378 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3379 j = 0; 3380 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3381 for (i=0; i<n; i++) { 3382 if (j >= BsubN) break; 3383 while (subgarray[i] > garray[j]) j++; 3384 3385 if (subgarray[i] == garray[j]) { 3386 idx_new[i] = idx[j++]; 3387 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3388 } 3389 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3390 3391 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3392 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3393 3394 } else if (BsubN < n) { 3395 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3396 } 3397 3398 ierr = PetscFree(garray);CHKERRQ(ierr); 3399 *submat = M; 3400 3401 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3402 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3403 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3404 3405 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3406 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3407 3408 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3409 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3410 } 3411 PetscFunctionReturn(0); 3412 } 3413 3414 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3415 { 3416 PetscErrorCode ierr; 3417 IS iscol_local=NULL,isrow_d; 3418 PetscInt csize; 3419 PetscInt n,i,j,start,end; 3420 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3421 MPI_Comm comm; 3422 3423 PetscFunctionBegin; 3424 /* If isrow has same processor distribution as mat, 3425 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3426 if (call == MAT_REUSE_MATRIX) { 3427 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3428 if (isrow_d) { 3429 sameRowDist = PETSC_TRUE; 3430 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3431 } else { 3432 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3433 if (iscol_local) { 3434 sameRowDist = PETSC_TRUE; 3435 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3436 } 3437 } 3438 } else { 3439 /* Check if isrow has same processor distribution as mat */ 3440 sameDist[0] = PETSC_FALSE; 3441 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3442 if (!n) { 3443 sameDist[0] = PETSC_TRUE; 3444 } else { 3445 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3446 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3447 if (i >= start && j < end) { 3448 sameDist[0] = PETSC_TRUE; 3449 } 3450 } 3451 3452 /* Check if iscol has same processor distribution as mat */ 3453 sameDist[1] = PETSC_FALSE; 3454 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3455 if (!n) { 3456 sameDist[1] = PETSC_TRUE; 3457 } else { 3458 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3459 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3460 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3461 } 3462 3463 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3464 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3465 sameRowDist = tsameDist[0]; 3466 } 3467 3468 if (sameRowDist) { 3469 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3470 /* isrow and iscol have same processor distribution as mat */ 3471 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3472 PetscFunctionReturn(0); 3473 } else { /* sameRowDist */ 3474 /* isrow has same processor distribution as mat */ 3475 if (call == MAT_INITIAL_MATRIX) { 3476 PetscBool sorted; 3477 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3478 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3479 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3480 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3481 3482 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3483 if (sorted) { 3484 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3485 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3486 PetscFunctionReturn(0); 3487 } 3488 } else { /* call == MAT_REUSE_MATRIX */ 3489 IS iscol_sub; 3490 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3491 if (iscol_sub) { 3492 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3493 PetscFunctionReturn(0); 3494 } 3495 } 3496 } 3497 } 3498 3499 /* General case: iscol -> iscol_local which has global size of iscol */ 3500 if (call == MAT_REUSE_MATRIX) { 3501 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3502 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3503 } else { 3504 if (!iscol_local) { 3505 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3506 } 3507 } 3508 3509 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3510 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3511 3512 if (call == MAT_INITIAL_MATRIX) { 3513 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3514 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3515 } 3516 PetscFunctionReturn(0); 3517 } 3518 3519 /*@C 3520 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3521 and "off-diagonal" part of the matrix in CSR format. 3522 3523 Collective 3524 3525 Input Parameters: 3526 + comm - MPI communicator 3527 . A - "diagonal" portion of matrix 3528 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3529 - garray - global index of B columns 3530 3531 Output Parameter: 3532 . mat - the matrix, with input A as its local diagonal matrix 3533 Level: advanced 3534 3535 Notes: 3536 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3537 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3538 3539 .seealso: MatCreateMPIAIJWithSplitArrays() 3540 @*/ 3541 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3542 { 3543 PetscErrorCode ierr; 3544 Mat_MPIAIJ *maij; 3545 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3546 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3547 PetscScalar *oa=b->a; 3548 Mat Bnew; 3549 PetscInt m,n,N; 3550 3551 PetscFunctionBegin; 3552 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3553 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3554 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3555 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3556 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3557 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3558 3559 /* Get global columns of mat */ 3560 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3561 3562 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3563 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3564 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3565 maij = (Mat_MPIAIJ*)(*mat)->data; 3566 3567 (*mat)->preallocated = PETSC_TRUE; 3568 3569 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3570 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3571 3572 /* Set A as diagonal portion of *mat */ 3573 maij->A = A; 3574 3575 nz = oi[m]; 3576 for (i=0; i<nz; i++) { 3577 col = oj[i]; 3578 oj[i] = garray[col]; 3579 } 3580 3581 /* Set Bnew as off-diagonal portion of *mat */ 3582 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3583 bnew = (Mat_SeqAIJ*)Bnew->data; 3584 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3585 maij->B = Bnew; 3586 3587 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3588 3589 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3590 b->free_a = PETSC_FALSE; 3591 b->free_ij = PETSC_FALSE; 3592 ierr = MatDestroy(&B);CHKERRQ(ierr); 3593 3594 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3595 bnew->free_a = PETSC_TRUE; 3596 bnew->free_ij = PETSC_TRUE; 3597 3598 /* condense columns of maij->B */ 3599 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3600 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3601 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3602 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3603 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3604 PetscFunctionReturn(0); 3605 } 3606 3607 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3608 3609 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3610 { 3611 PetscErrorCode ierr; 3612 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3613 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3614 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3615 Mat M,Msub,B=a->B; 3616 MatScalar *aa; 3617 Mat_SeqAIJ *aij; 3618 PetscInt *garray = a->garray,*colsub,Ncols; 3619 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3620 IS iscol_sub,iscmap; 3621 const PetscInt *is_idx,*cmap; 3622 PetscBool allcolumns=PETSC_FALSE; 3623 MPI_Comm comm; 3624 3625 PetscFunctionBegin; 3626 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3627 3628 if (call == MAT_REUSE_MATRIX) { 3629 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3630 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3631 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3632 3633 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3634 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3635 3636 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3637 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3638 3639 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3640 3641 } else { /* call == MAT_INITIAL_MATRIX) */ 3642 PetscBool flg; 3643 3644 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3645 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3646 3647 /* (1) iscol -> nonscalable iscol_local */ 3648 /* Check for special case: each processor gets entire matrix columns */ 3649 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3650 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3651 if (allcolumns) { 3652 iscol_sub = iscol_local; 3653 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3654 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3655 3656 } else { 3657 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3658 PetscInt *idx,*cmap1,k; 3659 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3660 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3661 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3662 count = 0; 3663 k = 0; 3664 for (i=0; i<Ncols; i++) { 3665 j = is_idx[i]; 3666 if (j >= cstart && j < cend) { 3667 /* diagonal part of mat */ 3668 idx[count] = j; 3669 cmap1[count++] = i; /* column index in submat */ 3670 } else if (Bn) { 3671 /* off-diagonal part of mat */ 3672 if (j == garray[k]) { 3673 idx[count] = j; 3674 cmap1[count++] = i; /* column index in submat */ 3675 } else if (j > garray[k]) { 3676 while (j > garray[k] && k < Bn-1) k++; 3677 if (j == garray[k]) { 3678 idx[count] = j; 3679 cmap1[count++] = i; /* column index in submat */ 3680 } 3681 } 3682 } 3683 } 3684 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3685 3686 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3687 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3688 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3689 3690 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3691 } 3692 3693 /* (3) Create sequential Msub */ 3694 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3695 } 3696 3697 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3698 aij = (Mat_SeqAIJ*)(Msub)->data; 3699 ii = aij->i; 3700 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3701 3702 /* 3703 m - number of local rows 3704 Ncols - number of columns (same on all processors) 3705 rstart - first row in new global matrix generated 3706 */ 3707 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3708 3709 if (call == MAT_INITIAL_MATRIX) { 3710 /* (4) Create parallel newmat */ 3711 PetscMPIInt rank,size; 3712 PetscInt csize; 3713 3714 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3715 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3716 3717 /* 3718 Determine the number of non-zeros in the diagonal and off-diagonal 3719 portions of the matrix in order to do correct preallocation 3720 */ 3721 3722 /* first get start and end of "diagonal" columns */ 3723 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3724 if (csize == PETSC_DECIDE) { 3725 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3726 if (mglobal == Ncols) { /* square matrix */ 3727 nlocal = m; 3728 } else { 3729 nlocal = Ncols/size + ((Ncols % size) > rank); 3730 } 3731 } else { 3732 nlocal = csize; 3733 } 3734 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3735 rstart = rend - nlocal; 3736 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3737 3738 /* next, compute all the lengths */ 3739 jj = aij->j; 3740 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3741 olens = dlens + m; 3742 for (i=0; i<m; i++) { 3743 jend = ii[i+1] - ii[i]; 3744 olen = 0; 3745 dlen = 0; 3746 for (j=0; j<jend; j++) { 3747 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3748 else dlen++; 3749 jj++; 3750 } 3751 olens[i] = olen; 3752 dlens[i] = dlen; 3753 } 3754 3755 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3756 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3757 3758 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3759 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3760 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3761 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3762 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3763 ierr = PetscFree(dlens);CHKERRQ(ierr); 3764 3765 } else { /* call == MAT_REUSE_MATRIX */ 3766 M = *newmat; 3767 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3768 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3769 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3770 /* 3771 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3772 rather than the slower MatSetValues(). 3773 */ 3774 M->was_assembled = PETSC_TRUE; 3775 M->assembled = PETSC_FALSE; 3776 } 3777 3778 /* (5) Set values of Msub to *newmat */ 3779 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3780 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3781 3782 jj = aij->j; 3783 aa = aij->a; 3784 for (i=0; i<m; i++) { 3785 row = rstart + i; 3786 nz = ii[i+1] - ii[i]; 3787 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3788 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3789 jj += nz; aa += nz; 3790 } 3791 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3792 3793 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3794 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3795 3796 ierr = PetscFree(colsub);CHKERRQ(ierr); 3797 3798 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3799 if (call == MAT_INITIAL_MATRIX) { 3800 *newmat = M; 3801 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3802 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3803 3804 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3805 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3806 3807 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3808 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3809 3810 if (iscol_local) { 3811 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3812 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3813 } 3814 } 3815 PetscFunctionReturn(0); 3816 } 3817 3818 /* 3819 Not great since it makes two copies of the submatrix, first an SeqAIJ 3820 in local and then by concatenating the local matrices the end result. 3821 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3822 3823 Note: This requires a sequential iscol with all indices. 3824 */ 3825 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3826 { 3827 PetscErrorCode ierr; 3828 PetscMPIInt rank,size; 3829 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3830 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3831 Mat M,Mreuse; 3832 MatScalar *aa,*vwork; 3833 MPI_Comm comm; 3834 Mat_SeqAIJ *aij; 3835 PetscBool colflag,allcolumns=PETSC_FALSE; 3836 3837 PetscFunctionBegin; 3838 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3839 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3840 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3841 3842 /* Check for special case: each processor gets entire matrix columns */ 3843 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3844 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3845 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3846 3847 if (call == MAT_REUSE_MATRIX) { 3848 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3849 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3850 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3851 } else { 3852 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3853 } 3854 3855 /* 3856 m - number of local rows 3857 n - number of columns (same on all processors) 3858 rstart - first row in new global matrix generated 3859 */ 3860 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3861 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3862 if (call == MAT_INITIAL_MATRIX) { 3863 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3864 ii = aij->i; 3865 jj = aij->j; 3866 3867 /* 3868 Determine the number of non-zeros in the diagonal and off-diagonal 3869 portions of the matrix in order to do correct preallocation 3870 */ 3871 3872 /* first get start and end of "diagonal" columns */ 3873 if (csize == PETSC_DECIDE) { 3874 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3875 if (mglobal == n) { /* square matrix */ 3876 nlocal = m; 3877 } else { 3878 nlocal = n/size + ((n % size) > rank); 3879 } 3880 } else { 3881 nlocal = csize; 3882 } 3883 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3884 rstart = rend - nlocal; 3885 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3886 3887 /* next, compute all the lengths */ 3888 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3889 olens = dlens + m; 3890 for (i=0; i<m; i++) { 3891 jend = ii[i+1] - ii[i]; 3892 olen = 0; 3893 dlen = 0; 3894 for (j=0; j<jend; j++) { 3895 if (*jj < rstart || *jj >= rend) olen++; 3896 else dlen++; 3897 jj++; 3898 } 3899 olens[i] = olen; 3900 dlens[i] = dlen; 3901 } 3902 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3903 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3904 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3905 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3906 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3907 ierr = PetscFree(dlens);CHKERRQ(ierr); 3908 } else { 3909 PetscInt ml,nl; 3910 3911 M = *newmat; 3912 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3913 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3914 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3915 /* 3916 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3917 rather than the slower MatSetValues(). 3918 */ 3919 M->was_assembled = PETSC_TRUE; 3920 M->assembled = PETSC_FALSE; 3921 } 3922 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3923 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3924 ii = aij->i; 3925 jj = aij->j; 3926 aa = aij->a; 3927 for (i=0; i<m; i++) { 3928 row = rstart + i; 3929 nz = ii[i+1] - ii[i]; 3930 cwork = jj; jj += nz; 3931 vwork = aa; aa += nz; 3932 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3933 } 3934 3935 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3936 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3937 *newmat = M; 3938 3939 /* save submatrix used in processor for next request */ 3940 if (call == MAT_INITIAL_MATRIX) { 3941 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3942 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3943 } 3944 PetscFunctionReturn(0); 3945 } 3946 3947 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3948 { 3949 PetscInt m,cstart, cend,j,nnz,i,d; 3950 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3951 const PetscInt *JJ; 3952 PetscErrorCode ierr; 3953 PetscBool nooffprocentries; 3954 3955 PetscFunctionBegin; 3956 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3957 3958 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3959 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3960 m = B->rmap->n; 3961 cstart = B->cmap->rstart; 3962 cend = B->cmap->rend; 3963 rstart = B->rmap->rstart; 3964 3965 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3966 3967 #if defined(PETSC_USE_DEBUG) 3968 for (i=0; i<m; i++) { 3969 nnz = Ii[i+1]- Ii[i]; 3970 JJ = J + Ii[i]; 3971 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3972 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3973 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3974 } 3975 #endif 3976 3977 for (i=0; i<m; i++) { 3978 nnz = Ii[i+1]- Ii[i]; 3979 JJ = J + Ii[i]; 3980 nnz_max = PetscMax(nnz_max,nnz); 3981 d = 0; 3982 for (j=0; j<nnz; j++) { 3983 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3984 } 3985 d_nnz[i] = d; 3986 o_nnz[i] = nnz - d; 3987 } 3988 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3989 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3990 3991 for (i=0; i<m; i++) { 3992 ii = i + rstart; 3993 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3994 } 3995 nooffprocentries = B->nooffprocentries; 3996 B->nooffprocentries = PETSC_TRUE; 3997 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3998 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3999 B->nooffprocentries = nooffprocentries; 4000 4001 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 4002 PetscFunctionReturn(0); 4003 } 4004 4005 /*@ 4006 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4007 (the default parallel PETSc format). 4008 4009 Collective 4010 4011 Input Parameters: 4012 + B - the matrix 4013 . i - the indices into j for the start of each local row (starts with zero) 4014 . j - the column indices for each local row (starts with zero) 4015 - v - optional values in the matrix 4016 4017 Level: developer 4018 4019 Notes: 4020 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4021 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4022 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4023 4024 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4025 4026 The format which is used for the sparse matrix input, is equivalent to a 4027 row-major ordering.. i.e for the following matrix, the input data expected is 4028 as shown 4029 4030 $ 1 0 0 4031 $ 2 0 3 P0 4032 $ ------- 4033 $ 4 5 6 P1 4034 $ 4035 $ Process0 [P0]: rows_owned=[0,1] 4036 $ i = {0,1,3} [size = nrow+1 = 2+1] 4037 $ j = {0,0,2} [size = 3] 4038 $ v = {1,2,3} [size = 3] 4039 $ 4040 $ Process1 [P1]: rows_owned=[2] 4041 $ i = {0,3} [size = nrow+1 = 1+1] 4042 $ j = {0,1,2} [size = 3] 4043 $ v = {4,5,6} [size = 3] 4044 4045 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4046 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4047 @*/ 4048 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4049 { 4050 PetscErrorCode ierr; 4051 4052 PetscFunctionBegin; 4053 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4054 PetscFunctionReturn(0); 4055 } 4056 4057 /*@C 4058 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4059 (the default parallel PETSc format). For good matrix assembly performance 4060 the user should preallocate the matrix storage by setting the parameters 4061 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4062 performance can be increased by more than a factor of 50. 4063 4064 Collective 4065 4066 Input Parameters: 4067 + B - the matrix 4068 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4069 (same value is used for all local rows) 4070 . d_nnz - array containing the number of nonzeros in the various rows of the 4071 DIAGONAL portion of the local submatrix (possibly different for each row) 4072 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4073 The size of this array is equal to the number of local rows, i.e 'm'. 4074 For matrices that will be factored, you must leave room for (and set) 4075 the diagonal entry even if it is zero. 4076 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4077 submatrix (same value is used for all local rows). 4078 - o_nnz - array containing the number of nonzeros in the various rows of the 4079 OFF-DIAGONAL portion of the local submatrix (possibly different for 4080 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4081 structure. The size of this array is equal to the number 4082 of local rows, i.e 'm'. 4083 4084 If the *_nnz parameter is given then the *_nz parameter is ignored 4085 4086 The AIJ format (also called the Yale sparse matrix format or 4087 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4088 storage. The stored row and column indices begin with zero. 4089 See Users-Manual: ch_mat for details. 4090 4091 The parallel matrix is partitioned such that the first m0 rows belong to 4092 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4093 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4094 4095 The DIAGONAL portion of the local submatrix of a processor can be defined 4096 as the submatrix which is obtained by extraction the part corresponding to 4097 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4098 first row that belongs to the processor, r2 is the last row belonging to 4099 the this processor, and c1-c2 is range of indices of the local part of a 4100 vector suitable for applying the matrix to. This is an mxn matrix. In the 4101 common case of a square matrix, the row and column ranges are the same and 4102 the DIAGONAL part is also square. The remaining portion of the local 4103 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4104 4105 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4106 4107 You can call MatGetInfo() to get information on how effective the preallocation was; 4108 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4109 You can also run with the option -info and look for messages with the string 4110 malloc in them to see if additional memory allocation was needed. 4111 4112 Example usage: 4113 4114 Consider the following 8x8 matrix with 34 non-zero values, that is 4115 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4116 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4117 as follows: 4118 4119 .vb 4120 1 2 0 | 0 3 0 | 0 4 4121 Proc0 0 5 6 | 7 0 0 | 8 0 4122 9 0 10 | 11 0 0 | 12 0 4123 ------------------------------------- 4124 13 0 14 | 15 16 17 | 0 0 4125 Proc1 0 18 0 | 19 20 21 | 0 0 4126 0 0 0 | 22 23 0 | 24 0 4127 ------------------------------------- 4128 Proc2 25 26 27 | 0 0 28 | 29 0 4129 30 0 0 | 31 32 33 | 0 34 4130 .ve 4131 4132 This can be represented as a collection of submatrices as: 4133 4134 .vb 4135 A B C 4136 D E F 4137 G H I 4138 .ve 4139 4140 Where the submatrices A,B,C are owned by proc0, D,E,F are 4141 owned by proc1, G,H,I are owned by proc2. 4142 4143 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4144 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4145 The 'M','N' parameters are 8,8, and have the same values on all procs. 4146 4147 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4148 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4149 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4150 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4151 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4152 matrix, ans [DF] as another SeqAIJ matrix. 4153 4154 When d_nz, o_nz parameters are specified, d_nz storage elements are 4155 allocated for every row of the local diagonal submatrix, and o_nz 4156 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4157 One way to choose d_nz and o_nz is to use the max nonzerors per local 4158 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4159 In this case, the values of d_nz,o_nz are: 4160 .vb 4161 proc0 : dnz = 2, o_nz = 2 4162 proc1 : dnz = 3, o_nz = 2 4163 proc2 : dnz = 1, o_nz = 4 4164 .ve 4165 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4166 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4167 for proc3. i.e we are using 12+15+10=37 storage locations to store 4168 34 values. 4169 4170 When d_nnz, o_nnz parameters are specified, the storage is specified 4171 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4172 In the above case the values for d_nnz,o_nnz are: 4173 .vb 4174 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4175 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4176 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4177 .ve 4178 Here the space allocated is sum of all the above values i.e 34, and 4179 hence pre-allocation is perfect. 4180 4181 Level: intermediate 4182 4183 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4184 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4185 @*/ 4186 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4187 { 4188 PetscErrorCode ierr; 4189 4190 PetscFunctionBegin; 4191 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4192 PetscValidType(B,1); 4193 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4194 PetscFunctionReturn(0); 4195 } 4196 4197 /*@ 4198 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4199 CSR format for the local rows. 4200 4201 Collective 4202 4203 Input Parameters: 4204 + comm - MPI communicator 4205 . m - number of local rows (Cannot be PETSC_DECIDE) 4206 . n - This value should be the same as the local size used in creating the 4207 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4208 calculated if N is given) For square matrices n is almost always m. 4209 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4210 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4211 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4212 . j - column indices 4213 - a - matrix values 4214 4215 Output Parameter: 4216 . mat - the matrix 4217 4218 Level: intermediate 4219 4220 Notes: 4221 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4222 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4223 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4224 4225 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4226 4227 The format which is used for the sparse matrix input, is equivalent to a 4228 row-major ordering.. i.e for the following matrix, the input data expected is 4229 as shown 4230 4231 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4232 4233 $ 1 0 0 4234 $ 2 0 3 P0 4235 $ ------- 4236 $ 4 5 6 P1 4237 $ 4238 $ Process0 [P0]: rows_owned=[0,1] 4239 $ i = {0,1,3} [size = nrow+1 = 2+1] 4240 $ j = {0,0,2} [size = 3] 4241 $ v = {1,2,3} [size = 3] 4242 $ 4243 $ Process1 [P1]: rows_owned=[2] 4244 $ i = {0,3} [size = nrow+1 = 1+1] 4245 $ j = {0,1,2} [size = 3] 4246 $ v = {4,5,6} [size = 3] 4247 4248 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4249 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4250 @*/ 4251 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4252 { 4253 PetscErrorCode ierr; 4254 4255 PetscFunctionBegin; 4256 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4257 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4258 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4259 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4260 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4261 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4262 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4263 PetscFunctionReturn(0); 4264 } 4265 4266 /*@ 4267 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4268 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4269 4270 Collective 4271 4272 Input Parameters: 4273 + mat - the matrix 4274 . m - number of local rows (Cannot be PETSC_DECIDE) 4275 . n - This value should be the same as the local size used in creating the 4276 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4277 calculated if N is given) For square matrices n is almost always m. 4278 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4279 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4280 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4281 . J - column indices 4282 - v - matrix values 4283 4284 Level: intermediate 4285 4286 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4287 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4288 @*/ 4289 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4290 { 4291 PetscErrorCode ierr; 4292 PetscInt cstart,nnz,i,j; 4293 PetscInt *ld; 4294 PetscBool nooffprocentries; 4295 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4296 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4297 PetscScalar *ad = Ad->a, *ao = Ao->a; 4298 const PetscInt *Adi = Ad->i; 4299 PetscInt ldi,Iii,md; 4300 4301 PetscFunctionBegin; 4302 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4303 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4304 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4305 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4306 4307 cstart = mat->cmap->rstart; 4308 if (!Aij->ld) { 4309 /* count number of entries below block diagonal */ 4310 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4311 Aij->ld = ld; 4312 for (i=0; i<m; i++) { 4313 nnz = Ii[i+1]- Ii[i]; 4314 j = 0; 4315 while (J[j] < cstart && j < nnz) {j++;} 4316 J += nnz; 4317 ld[i] = j; 4318 } 4319 } else { 4320 ld = Aij->ld; 4321 } 4322 4323 for (i=0; i<m; i++) { 4324 nnz = Ii[i+1]- Ii[i]; 4325 Iii = Ii[i]; 4326 ldi = ld[i]; 4327 md = Adi[i+1]-Adi[i]; 4328 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4329 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4330 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4331 ad += md; 4332 ao += nnz - md; 4333 } 4334 nooffprocentries = mat->nooffprocentries; 4335 mat->nooffprocentries = PETSC_TRUE; 4336 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4337 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4338 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4339 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4340 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4341 mat->nooffprocentries = nooffprocentries; 4342 PetscFunctionReturn(0); 4343 } 4344 4345 /*@C 4346 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4347 (the default parallel PETSc format). For good matrix assembly performance 4348 the user should preallocate the matrix storage by setting the parameters 4349 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4350 performance can be increased by more than a factor of 50. 4351 4352 Collective 4353 4354 Input Parameters: 4355 + comm - MPI communicator 4356 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4357 This value should be the same as the local size used in creating the 4358 y vector for the matrix-vector product y = Ax. 4359 . n - This value should be the same as the local size used in creating the 4360 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4361 calculated if N is given) For square matrices n is almost always m. 4362 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4363 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4364 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4365 (same value is used for all local rows) 4366 . d_nnz - array containing the number of nonzeros in the various rows of the 4367 DIAGONAL portion of the local submatrix (possibly different for each row) 4368 or NULL, if d_nz is used to specify the nonzero structure. 4369 The size of this array is equal to the number of local rows, i.e 'm'. 4370 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4371 submatrix (same value is used for all local rows). 4372 - o_nnz - array containing the number of nonzeros in the various rows of the 4373 OFF-DIAGONAL portion of the local submatrix (possibly different for 4374 each row) or NULL, if o_nz is used to specify the nonzero 4375 structure. The size of this array is equal to the number 4376 of local rows, i.e 'm'. 4377 4378 Output Parameter: 4379 . A - the matrix 4380 4381 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4382 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4383 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4384 4385 Notes: 4386 If the *_nnz parameter is given then the *_nz parameter is ignored 4387 4388 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4389 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4390 storage requirements for this matrix. 4391 4392 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4393 processor than it must be used on all processors that share the object for 4394 that argument. 4395 4396 The user MUST specify either the local or global matrix dimensions 4397 (possibly both). 4398 4399 The parallel matrix is partitioned across processors such that the 4400 first m0 rows belong to process 0, the next m1 rows belong to 4401 process 1, the next m2 rows belong to process 2 etc.. where 4402 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4403 values corresponding to [m x N] submatrix. 4404 4405 The columns are logically partitioned with the n0 columns belonging 4406 to 0th partition, the next n1 columns belonging to the next 4407 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4408 4409 The DIAGONAL portion of the local submatrix on any given processor 4410 is the submatrix corresponding to the rows and columns m,n 4411 corresponding to the given processor. i.e diagonal matrix on 4412 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4413 etc. The remaining portion of the local submatrix [m x (N-n)] 4414 constitute the OFF-DIAGONAL portion. The example below better 4415 illustrates this concept. 4416 4417 For a square global matrix we define each processor's diagonal portion 4418 to be its local rows and the corresponding columns (a square submatrix); 4419 each processor's off-diagonal portion encompasses the remainder of the 4420 local matrix (a rectangular submatrix). 4421 4422 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4423 4424 When calling this routine with a single process communicator, a matrix of 4425 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4426 type of communicator, use the construction mechanism 4427 .vb 4428 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4429 .ve 4430 4431 $ MatCreate(...,&A); 4432 $ MatSetType(A,MATMPIAIJ); 4433 $ MatSetSizes(A, m,n,M,N); 4434 $ MatMPIAIJSetPreallocation(A,...); 4435 4436 By default, this format uses inodes (identical nodes) when possible. 4437 We search for consecutive rows with the same nonzero structure, thereby 4438 reusing matrix information to achieve increased efficiency. 4439 4440 Options Database Keys: 4441 + -mat_no_inode - Do not use inodes 4442 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4443 4444 4445 4446 Example usage: 4447 4448 Consider the following 8x8 matrix with 34 non-zero values, that is 4449 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4450 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4451 as follows 4452 4453 .vb 4454 1 2 0 | 0 3 0 | 0 4 4455 Proc0 0 5 6 | 7 0 0 | 8 0 4456 9 0 10 | 11 0 0 | 12 0 4457 ------------------------------------- 4458 13 0 14 | 15 16 17 | 0 0 4459 Proc1 0 18 0 | 19 20 21 | 0 0 4460 0 0 0 | 22 23 0 | 24 0 4461 ------------------------------------- 4462 Proc2 25 26 27 | 0 0 28 | 29 0 4463 30 0 0 | 31 32 33 | 0 34 4464 .ve 4465 4466 This can be represented as a collection of submatrices as 4467 4468 .vb 4469 A B C 4470 D E F 4471 G H I 4472 .ve 4473 4474 Where the submatrices A,B,C are owned by proc0, D,E,F are 4475 owned by proc1, G,H,I are owned by proc2. 4476 4477 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4478 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4479 The 'M','N' parameters are 8,8, and have the same values on all procs. 4480 4481 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4482 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4483 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4484 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4485 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4486 matrix, ans [DF] as another SeqAIJ matrix. 4487 4488 When d_nz, o_nz parameters are specified, d_nz storage elements are 4489 allocated for every row of the local diagonal submatrix, and o_nz 4490 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4491 One way to choose d_nz and o_nz is to use the max nonzerors per local 4492 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4493 In this case, the values of d_nz,o_nz are 4494 .vb 4495 proc0 : dnz = 2, o_nz = 2 4496 proc1 : dnz = 3, o_nz = 2 4497 proc2 : dnz = 1, o_nz = 4 4498 .ve 4499 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4500 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4501 for proc3. i.e we are using 12+15+10=37 storage locations to store 4502 34 values. 4503 4504 When d_nnz, o_nnz parameters are specified, the storage is specified 4505 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4506 In the above case the values for d_nnz,o_nnz are 4507 .vb 4508 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4509 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4510 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4511 .ve 4512 Here the space allocated is sum of all the above values i.e 34, and 4513 hence pre-allocation is perfect. 4514 4515 Level: intermediate 4516 4517 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4518 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4519 @*/ 4520 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4521 { 4522 PetscErrorCode ierr; 4523 PetscMPIInt size; 4524 4525 PetscFunctionBegin; 4526 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4527 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4528 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4529 if (size > 1) { 4530 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4531 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4532 } else { 4533 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4534 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4535 } 4536 PetscFunctionReturn(0); 4537 } 4538 4539 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4540 { 4541 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4542 PetscBool flg; 4543 PetscErrorCode ierr; 4544 4545 PetscFunctionBegin; 4546 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4547 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4548 if (Ad) *Ad = a->A; 4549 if (Ao) *Ao = a->B; 4550 if (colmap) *colmap = a->garray; 4551 PetscFunctionReturn(0); 4552 } 4553 4554 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4555 { 4556 PetscErrorCode ierr; 4557 PetscInt m,N,i,rstart,nnz,Ii; 4558 PetscInt *indx; 4559 PetscScalar *values; 4560 4561 PetscFunctionBegin; 4562 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4563 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4564 PetscInt *dnz,*onz,sum,bs,cbs; 4565 4566 if (n == PETSC_DECIDE) { 4567 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4568 } 4569 /* Check sum(n) = N */ 4570 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4571 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4572 4573 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4574 rstart -= m; 4575 4576 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4577 for (i=0; i<m; i++) { 4578 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4579 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4580 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4581 } 4582 4583 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4584 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4585 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4586 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4587 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4588 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4589 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4590 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4591 } 4592 4593 /* numeric phase */ 4594 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4595 for (i=0; i<m; i++) { 4596 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4597 Ii = i + rstart; 4598 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4599 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4600 } 4601 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4602 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4603 PetscFunctionReturn(0); 4604 } 4605 4606 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4607 { 4608 PetscErrorCode ierr; 4609 PetscMPIInt rank; 4610 PetscInt m,N,i,rstart,nnz; 4611 size_t len; 4612 const PetscInt *indx; 4613 PetscViewer out; 4614 char *name; 4615 Mat B; 4616 const PetscScalar *values; 4617 4618 PetscFunctionBegin; 4619 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4620 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4621 /* Should this be the type of the diagonal block of A? */ 4622 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4623 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4624 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4625 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4626 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4627 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4628 for (i=0; i<m; i++) { 4629 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4630 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4631 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4632 } 4633 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4634 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4635 4636 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4637 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4638 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4639 sprintf(name,"%s.%d",outfile,rank); 4640 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4641 ierr = PetscFree(name);CHKERRQ(ierr); 4642 ierr = MatView(B,out);CHKERRQ(ierr); 4643 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4644 ierr = MatDestroy(&B);CHKERRQ(ierr); 4645 PetscFunctionReturn(0); 4646 } 4647 4648 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4649 { 4650 PetscErrorCode ierr; 4651 Mat_Merge_SeqsToMPI *merge; 4652 PetscContainer container; 4653 4654 PetscFunctionBegin; 4655 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4656 if (container) { 4657 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4658 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4659 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4660 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4661 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4662 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4663 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4664 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4665 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4666 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4667 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4668 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4669 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4670 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4671 ierr = PetscFree(merge);CHKERRQ(ierr); 4672 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4673 } 4674 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4675 PetscFunctionReturn(0); 4676 } 4677 4678 #include <../src/mat/utils/freespace.h> 4679 #include <petscbt.h> 4680 4681 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4682 { 4683 PetscErrorCode ierr; 4684 MPI_Comm comm; 4685 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4686 PetscMPIInt size,rank,taga,*len_s; 4687 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4688 PetscInt proc,m; 4689 PetscInt **buf_ri,**buf_rj; 4690 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4691 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4692 MPI_Request *s_waits,*r_waits; 4693 MPI_Status *status; 4694 MatScalar *aa=a->a; 4695 MatScalar **abuf_r,*ba_i; 4696 Mat_Merge_SeqsToMPI *merge; 4697 PetscContainer container; 4698 4699 PetscFunctionBegin; 4700 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4701 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4702 4703 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4704 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4705 4706 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4707 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4708 4709 bi = merge->bi; 4710 bj = merge->bj; 4711 buf_ri = merge->buf_ri; 4712 buf_rj = merge->buf_rj; 4713 4714 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4715 owners = merge->rowmap->range; 4716 len_s = merge->len_s; 4717 4718 /* send and recv matrix values */ 4719 /*-----------------------------*/ 4720 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4721 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4722 4723 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4724 for (proc=0,k=0; proc<size; proc++) { 4725 if (!len_s[proc]) continue; 4726 i = owners[proc]; 4727 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4728 k++; 4729 } 4730 4731 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4732 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4733 ierr = PetscFree(status);CHKERRQ(ierr); 4734 4735 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4736 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4737 4738 /* insert mat values of mpimat */ 4739 /*----------------------------*/ 4740 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4741 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4742 4743 for (k=0; k<merge->nrecv; k++) { 4744 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4745 nrows = *(buf_ri_k[k]); 4746 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4747 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4748 } 4749 4750 /* set values of ba */ 4751 m = merge->rowmap->n; 4752 for (i=0; i<m; i++) { 4753 arow = owners[rank] + i; 4754 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4755 bnzi = bi[i+1] - bi[i]; 4756 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4757 4758 /* add local non-zero vals of this proc's seqmat into ba */ 4759 anzi = ai[arow+1] - ai[arow]; 4760 aj = a->j + ai[arow]; 4761 aa = a->a + ai[arow]; 4762 nextaj = 0; 4763 for (j=0; nextaj<anzi; j++) { 4764 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4765 ba_i[j] += aa[nextaj++]; 4766 } 4767 } 4768 4769 /* add received vals into ba */ 4770 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4771 /* i-th row */ 4772 if (i == *nextrow[k]) { 4773 anzi = *(nextai[k]+1) - *nextai[k]; 4774 aj = buf_rj[k] + *(nextai[k]); 4775 aa = abuf_r[k] + *(nextai[k]); 4776 nextaj = 0; 4777 for (j=0; nextaj<anzi; j++) { 4778 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4779 ba_i[j] += aa[nextaj++]; 4780 } 4781 } 4782 nextrow[k]++; nextai[k]++; 4783 } 4784 } 4785 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4786 } 4787 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4788 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4789 4790 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4791 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4792 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4793 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4794 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4795 PetscFunctionReturn(0); 4796 } 4797 4798 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4799 { 4800 PetscErrorCode ierr; 4801 Mat B_mpi; 4802 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4803 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4804 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4805 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4806 PetscInt len,proc,*dnz,*onz,bs,cbs; 4807 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4808 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4809 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4810 MPI_Status *status; 4811 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4812 PetscBT lnkbt; 4813 Mat_Merge_SeqsToMPI *merge; 4814 PetscContainer container; 4815 4816 PetscFunctionBegin; 4817 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4818 4819 /* make sure it is a PETSc comm */ 4820 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4821 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4822 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4823 4824 ierr = PetscNew(&merge);CHKERRQ(ierr); 4825 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4826 4827 /* determine row ownership */ 4828 /*---------------------------------------------------------*/ 4829 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4830 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4831 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4832 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4833 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4834 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4835 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4836 4837 m = merge->rowmap->n; 4838 owners = merge->rowmap->range; 4839 4840 /* determine the number of messages to send, their lengths */ 4841 /*---------------------------------------------------------*/ 4842 len_s = merge->len_s; 4843 4844 len = 0; /* length of buf_si[] */ 4845 merge->nsend = 0; 4846 for (proc=0; proc<size; proc++) { 4847 len_si[proc] = 0; 4848 if (proc == rank) { 4849 len_s[proc] = 0; 4850 } else { 4851 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4852 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4853 } 4854 if (len_s[proc]) { 4855 merge->nsend++; 4856 nrows = 0; 4857 for (i=owners[proc]; i<owners[proc+1]; i++) { 4858 if (ai[i+1] > ai[i]) nrows++; 4859 } 4860 len_si[proc] = 2*(nrows+1); 4861 len += len_si[proc]; 4862 } 4863 } 4864 4865 /* determine the number and length of messages to receive for ij-structure */ 4866 /*-------------------------------------------------------------------------*/ 4867 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4868 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4869 4870 /* post the Irecv of j-structure */ 4871 /*-------------------------------*/ 4872 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4873 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4874 4875 /* post the Isend of j-structure */ 4876 /*--------------------------------*/ 4877 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4878 4879 for (proc=0, k=0; proc<size; proc++) { 4880 if (!len_s[proc]) continue; 4881 i = owners[proc]; 4882 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4883 k++; 4884 } 4885 4886 /* receives and sends of j-structure are complete */ 4887 /*------------------------------------------------*/ 4888 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4889 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4890 4891 /* send and recv i-structure */ 4892 /*---------------------------*/ 4893 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4894 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4895 4896 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4897 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4898 for (proc=0,k=0; proc<size; proc++) { 4899 if (!len_s[proc]) continue; 4900 /* form outgoing message for i-structure: 4901 buf_si[0]: nrows to be sent 4902 [1:nrows]: row index (global) 4903 [nrows+1:2*nrows+1]: i-structure index 4904 */ 4905 /*-------------------------------------------*/ 4906 nrows = len_si[proc]/2 - 1; 4907 buf_si_i = buf_si + nrows+1; 4908 buf_si[0] = nrows; 4909 buf_si_i[0] = 0; 4910 nrows = 0; 4911 for (i=owners[proc]; i<owners[proc+1]; i++) { 4912 anzi = ai[i+1] - ai[i]; 4913 if (anzi) { 4914 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4915 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4916 nrows++; 4917 } 4918 } 4919 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4920 k++; 4921 buf_si += len_si[proc]; 4922 } 4923 4924 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4925 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4926 4927 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4928 for (i=0; i<merge->nrecv; i++) { 4929 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4930 } 4931 4932 ierr = PetscFree(len_si);CHKERRQ(ierr); 4933 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4934 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4935 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4936 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4937 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4938 ierr = PetscFree(status);CHKERRQ(ierr); 4939 4940 /* compute a local seq matrix in each processor */ 4941 /*----------------------------------------------*/ 4942 /* allocate bi array and free space for accumulating nonzero column info */ 4943 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4944 bi[0] = 0; 4945 4946 /* create and initialize a linked list */ 4947 nlnk = N+1; 4948 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4949 4950 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4951 len = ai[owners[rank+1]] - ai[owners[rank]]; 4952 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4953 4954 current_space = free_space; 4955 4956 /* determine symbolic info for each local row */ 4957 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4958 4959 for (k=0; k<merge->nrecv; k++) { 4960 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4961 nrows = *buf_ri_k[k]; 4962 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4963 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4964 } 4965 4966 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4967 len = 0; 4968 for (i=0; i<m; i++) { 4969 bnzi = 0; 4970 /* add local non-zero cols of this proc's seqmat into lnk */ 4971 arow = owners[rank] + i; 4972 anzi = ai[arow+1] - ai[arow]; 4973 aj = a->j + ai[arow]; 4974 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4975 bnzi += nlnk; 4976 /* add received col data into lnk */ 4977 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4978 if (i == *nextrow[k]) { /* i-th row */ 4979 anzi = *(nextai[k]+1) - *nextai[k]; 4980 aj = buf_rj[k] + *nextai[k]; 4981 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4982 bnzi += nlnk; 4983 nextrow[k]++; nextai[k]++; 4984 } 4985 } 4986 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4987 4988 /* if free space is not available, make more free space */ 4989 if (current_space->local_remaining<bnzi) { 4990 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4991 nspacedouble++; 4992 } 4993 /* copy data into free space, then initialize lnk */ 4994 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4995 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4996 4997 current_space->array += bnzi; 4998 current_space->local_used += bnzi; 4999 current_space->local_remaining -= bnzi; 5000 5001 bi[i+1] = bi[i] + bnzi; 5002 } 5003 5004 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5005 5006 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5007 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5008 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5009 5010 /* create symbolic parallel matrix B_mpi */ 5011 /*---------------------------------------*/ 5012 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5013 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5014 if (n==PETSC_DECIDE) { 5015 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5016 } else { 5017 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5018 } 5019 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5020 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5021 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5022 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5023 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5024 5025 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5026 B_mpi->assembled = PETSC_FALSE; 5027 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5028 merge->bi = bi; 5029 merge->bj = bj; 5030 merge->buf_ri = buf_ri; 5031 merge->buf_rj = buf_rj; 5032 merge->coi = NULL; 5033 merge->coj = NULL; 5034 merge->owners_co = NULL; 5035 5036 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5037 5038 /* attach the supporting struct to B_mpi for reuse */ 5039 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5040 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5041 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5042 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5043 *mpimat = B_mpi; 5044 5045 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5046 PetscFunctionReturn(0); 5047 } 5048 5049 /*@C 5050 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5051 matrices from each processor 5052 5053 Collective 5054 5055 Input Parameters: 5056 + comm - the communicators the parallel matrix will live on 5057 . seqmat - the input sequential matrices 5058 . m - number of local rows (or PETSC_DECIDE) 5059 . n - number of local columns (or PETSC_DECIDE) 5060 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5061 5062 Output Parameter: 5063 . mpimat - the parallel matrix generated 5064 5065 Level: advanced 5066 5067 Notes: 5068 The dimensions of the sequential matrix in each processor MUST be the same. 5069 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5070 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5071 @*/ 5072 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5073 { 5074 PetscErrorCode ierr; 5075 PetscMPIInt size; 5076 5077 PetscFunctionBegin; 5078 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5079 if (size == 1) { 5080 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5081 if (scall == MAT_INITIAL_MATRIX) { 5082 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5083 } else { 5084 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5085 } 5086 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5087 PetscFunctionReturn(0); 5088 } 5089 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5090 if (scall == MAT_INITIAL_MATRIX) { 5091 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5092 } 5093 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5094 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5095 PetscFunctionReturn(0); 5096 } 5097 5098 /*@ 5099 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5100 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5101 with MatGetSize() 5102 5103 Not Collective 5104 5105 Input Parameters: 5106 + A - the matrix 5107 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5108 5109 Output Parameter: 5110 . A_loc - the local sequential matrix generated 5111 5112 Level: developer 5113 5114 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5115 5116 @*/ 5117 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5118 { 5119 PetscErrorCode ierr; 5120 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5121 Mat_SeqAIJ *mat,*a,*b; 5122 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5123 MatScalar *aa,*ba,*cam; 5124 PetscScalar *ca; 5125 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5126 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5127 PetscBool match; 5128 MPI_Comm comm; 5129 PetscMPIInt size; 5130 5131 PetscFunctionBegin; 5132 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5133 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5134 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5135 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5136 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5137 5138 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5139 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5140 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5141 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5142 aa = a->a; ba = b->a; 5143 if (scall == MAT_INITIAL_MATRIX) { 5144 if (size == 1) { 5145 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5146 PetscFunctionReturn(0); 5147 } 5148 5149 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5150 ci[0] = 0; 5151 for (i=0; i<am; i++) { 5152 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5153 } 5154 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5155 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5156 k = 0; 5157 for (i=0; i<am; i++) { 5158 ncols_o = bi[i+1] - bi[i]; 5159 ncols_d = ai[i+1] - ai[i]; 5160 /* off-diagonal portion of A */ 5161 for (jo=0; jo<ncols_o; jo++) { 5162 col = cmap[*bj]; 5163 if (col >= cstart) break; 5164 cj[k] = col; bj++; 5165 ca[k++] = *ba++; 5166 } 5167 /* diagonal portion of A */ 5168 for (j=0; j<ncols_d; j++) { 5169 cj[k] = cstart + *aj++; 5170 ca[k++] = *aa++; 5171 } 5172 /* off-diagonal portion of A */ 5173 for (j=jo; j<ncols_o; j++) { 5174 cj[k] = cmap[*bj++]; 5175 ca[k++] = *ba++; 5176 } 5177 } 5178 /* put together the new matrix */ 5179 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5180 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5181 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5182 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5183 mat->free_a = PETSC_TRUE; 5184 mat->free_ij = PETSC_TRUE; 5185 mat->nonew = 0; 5186 } else if (scall == MAT_REUSE_MATRIX) { 5187 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5188 ci = mat->i; cj = mat->j; cam = mat->a; 5189 for (i=0; i<am; i++) { 5190 /* off-diagonal portion of A */ 5191 ncols_o = bi[i+1] - bi[i]; 5192 for (jo=0; jo<ncols_o; jo++) { 5193 col = cmap[*bj]; 5194 if (col >= cstart) break; 5195 *cam++ = *ba++; bj++; 5196 } 5197 /* diagonal portion of A */ 5198 ncols_d = ai[i+1] - ai[i]; 5199 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5200 /* off-diagonal portion of A */ 5201 for (j=jo; j<ncols_o; j++) { 5202 *cam++ = *ba++; bj++; 5203 } 5204 } 5205 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5206 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5207 PetscFunctionReturn(0); 5208 } 5209 5210 /*@C 5211 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5212 5213 Not Collective 5214 5215 Input Parameters: 5216 + A - the matrix 5217 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5218 - row, col - index sets of rows and columns to extract (or NULL) 5219 5220 Output Parameter: 5221 . A_loc - the local sequential matrix generated 5222 5223 Level: developer 5224 5225 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5226 5227 @*/ 5228 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5229 { 5230 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5231 PetscErrorCode ierr; 5232 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5233 IS isrowa,iscola; 5234 Mat *aloc; 5235 PetscBool match; 5236 5237 PetscFunctionBegin; 5238 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5239 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5240 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5241 if (!row) { 5242 start = A->rmap->rstart; end = A->rmap->rend; 5243 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5244 } else { 5245 isrowa = *row; 5246 } 5247 if (!col) { 5248 start = A->cmap->rstart; 5249 cmap = a->garray; 5250 nzA = a->A->cmap->n; 5251 nzB = a->B->cmap->n; 5252 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5253 ncols = 0; 5254 for (i=0; i<nzB; i++) { 5255 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5256 else break; 5257 } 5258 imark = i; 5259 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5260 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5261 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5262 } else { 5263 iscola = *col; 5264 } 5265 if (scall != MAT_INITIAL_MATRIX) { 5266 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5267 aloc[0] = *A_loc; 5268 } 5269 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5270 if (!col) { /* attach global id of condensed columns */ 5271 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5272 } 5273 *A_loc = aloc[0]; 5274 ierr = PetscFree(aloc);CHKERRQ(ierr); 5275 if (!row) { 5276 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5277 } 5278 if (!col) { 5279 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5280 } 5281 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5282 PetscFunctionReturn(0); 5283 } 5284 5285 /* 5286 * Destroy a mat that may be composed with PetscSF communication objects. 5287 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5288 * */ 5289 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5290 { 5291 PetscSF sf,osf; 5292 IS map; 5293 PetscErrorCode ierr; 5294 5295 PetscFunctionBegin; 5296 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5297 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5298 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5299 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5300 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5301 ierr = ISDestroy(&map);CHKERRQ(ierr); 5302 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5303 PetscFunctionReturn(0); 5304 } 5305 5306 /* 5307 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5308 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5309 * on a global size. 5310 * */ 5311 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5312 { 5313 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5314 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5315 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol; 5316 PetscMPIInt owner; 5317 PetscSFNode *iremote,*oiremote; 5318 const PetscInt *lrowindices; 5319 PetscErrorCode ierr; 5320 PetscSF sf,osf; 5321 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5322 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5323 MPI_Comm comm; 5324 ISLocalToGlobalMapping mapping; 5325 5326 PetscFunctionBegin; 5327 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5328 /* plocalsize is the number of roots 5329 * nrows is the number of leaves 5330 * */ 5331 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5332 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5333 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5334 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5335 for (i=0;i<nrows;i++) { 5336 /* Find a remote index and an owner for a row 5337 * The row could be local or remote 5338 * */ 5339 owner = 0; 5340 lidx = 0; 5341 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5342 iremote[i].index = lidx; 5343 iremote[i].rank = owner; 5344 } 5345 /* Create SF to communicate how many nonzero columns for each row */ 5346 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5347 /* SF will figure out the number of nonzero colunms for each row, and their 5348 * offsets 5349 * */ 5350 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5351 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5352 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5353 5354 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5355 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5356 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5357 roffsets[0] = 0; 5358 roffsets[1] = 0; 5359 for (i=0;i<plocalsize;i++) { 5360 /* diag */ 5361 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5362 /* off diag */ 5363 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5364 /* compute offsets so that we relative location for each row */ 5365 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5366 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5367 } 5368 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5369 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5370 /* 'r' means root, and 'l' means leaf */ 5371 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5372 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5373 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5374 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5375 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5376 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5377 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5378 dntotalcols = 0; 5379 ontotalcols = 0; 5380 ncol = 0; 5381 for (i=0;i<nrows;i++) { 5382 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5383 ncol = PetscMax(pnnz[i],ncol); 5384 /* diag */ 5385 dntotalcols += nlcols[i*2+0]; 5386 /* off diag */ 5387 ontotalcols += nlcols[i*2+1]; 5388 } 5389 /* We do not need to figure the right number of columns 5390 * since all the calculations will be done by going through the raw data 5391 * */ 5392 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5393 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5394 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5395 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5396 /* diag */ 5397 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5398 /* off diag */ 5399 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5400 /* diag */ 5401 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5402 /* off diag */ 5403 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5404 dntotalcols = 0; 5405 ontotalcols = 0; 5406 ntotalcols = 0; 5407 for (i=0;i<nrows;i++) { 5408 owner = 0; 5409 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5410 /* Set iremote for diag matrix */ 5411 for (j=0;j<nlcols[i*2+0];j++) { 5412 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5413 iremote[dntotalcols].rank = owner; 5414 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5415 ilocal[dntotalcols++] = ntotalcols++; 5416 } 5417 /* off diag */ 5418 for (j=0;j<nlcols[i*2+1];j++) { 5419 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5420 oiremote[ontotalcols].rank = owner; 5421 oilocal[ontotalcols++] = ntotalcols++; 5422 } 5423 } 5424 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5425 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5426 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5427 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5428 /* P serves as roots and P_oth is leaves 5429 * Diag matrix 5430 * */ 5431 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5432 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5433 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5434 5435 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5436 /* Off diag */ 5437 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5438 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5439 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5440 /* We operate on the matrix internal data for saving memory */ 5441 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5442 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5443 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5444 /* Convert to global indices for diag matrix */ 5445 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5446 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5447 /* We want P_oth store global indices */ 5448 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5449 /* Use memory scalable approach */ 5450 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5451 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5452 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5453 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5454 /* Convert back to local indices */ 5455 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5456 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5457 nout = 0; 5458 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5459 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5460 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5461 /* Exchange values */ 5462 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5463 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5464 /* Stop PETSc from shrinking memory */ 5465 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5466 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5467 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5468 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5469 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5470 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5471 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5472 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5473 PetscFunctionReturn(0); 5474 } 5475 5476 /* 5477 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5478 * This supports MPIAIJ and MAIJ 5479 * */ 5480 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5481 { 5482 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5483 Mat_SeqAIJ *p_oth; 5484 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5485 IS rows,map; 5486 PetscHMapI hamp; 5487 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5488 MPI_Comm comm; 5489 PetscSF sf,osf; 5490 PetscBool has; 5491 PetscErrorCode ierr; 5492 5493 PetscFunctionBegin; 5494 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5495 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5496 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5497 * and then create a submatrix (that often is an overlapping matrix) 5498 * */ 5499 if (reuse==MAT_INITIAL_MATRIX) { 5500 /* Use a hash table to figure out unique keys */ 5501 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5502 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5503 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5504 count = 0; 5505 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5506 for (i=0;i<a->B->cmap->n;i++) { 5507 key = a->garray[i]/dof; 5508 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5509 if (!has) { 5510 mapping[i] = count; 5511 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5512 } else { 5513 /* Current 'i' has the same value the previous step */ 5514 mapping[i] = count-1; 5515 } 5516 } 5517 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5518 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5519 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5520 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5521 off = 0; 5522 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5523 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5524 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5525 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5526 /* In case, the matrix was already created but users want to recreate the matrix */ 5527 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5528 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5529 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5530 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5531 } else if (reuse==MAT_REUSE_MATRIX) { 5532 /* If matrix was already created, we simply update values using SF objects 5533 * that as attached to the matrix ealier. 5534 * */ 5535 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5536 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5537 if (!sf || !osf) { 5538 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5539 } 5540 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5541 /* Update values in place */ 5542 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5543 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5544 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5545 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5546 } else { 5547 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5548 } 5549 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5550 PetscFunctionReturn(0); 5551 } 5552 5553 /*@C 5554 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5555 5556 Collective on Mat 5557 5558 Input Parameters: 5559 + A,B - the matrices in mpiaij format 5560 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5561 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5562 5563 Output Parameter: 5564 + rowb, colb - index sets of rows and columns of B to extract 5565 - B_seq - the sequential matrix generated 5566 5567 Level: developer 5568 5569 @*/ 5570 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5571 { 5572 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5573 PetscErrorCode ierr; 5574 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5575 IS isrowb,iscolb; 5576 Mat *bseq=NULL; 5577 5578 PetscFunctionBegin; 5579 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5580 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5581 } 5582 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5583 5584 if (scall == MAT_INITIAL_MATRIX) { 5585 start = A->cmap->rstart; 5586 cmap = a->garray; 5587 nzA = a->A->cmap->n; 5588 nzB = a->B->cmap->n; 5589 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5590 ncols = 0; 5591 for (i=0; i<nzB; i++) { /* row < local row index */ 5592 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5593 else break; 5594 } 5595 imark = i; 5596 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5597 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5598 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5599 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5600 } else { 5601 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5602 isrowb = *rowb; iscolb = *colb; 5603 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5604 bseq[0] = *B_seq; 5605 } 5606 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5607 *B_seq = bseq[0]; 5608 ierr = PetscFree(bseq);CHKERRQ(ierr); 5609 if (!rowb) { 5610 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5611 } else { 5612 *rowb = isrowb; 5613 } 5614 if (!colb) { 5615 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5616 } else { 5617 *colb = iscolb; 5618 } 5619 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5620 PetscFunctionReturn(0); 5621 } 5622 5623 /* 5624 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5625 of the OFF-DIAGONAL portion of local A 5626 5627 Collective on Mat 5628 5629 Input Parameters: 5630 + A,B - the matrices in mpiaij format 5631 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5632 5633 Output Parameter: 5634 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5635 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5636 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5637 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5638 5639 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5640 for this matrix. This is not desirable.. 5641 5642 Level: developer 5643 5644 */ 5645 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5646 { 5647 PetscErrorCode ierr; 5648 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5649 Mat_SeqAIJ *b_oth; 5650 VecScatter ctx; 5651 MPI_Comm comm; 5652 const PetscMPIInt *rprocs,*sprocs; 5653 const PetscInt *srow,*rstarts,*sstarts; 5654 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5655 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5656 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5657 MPI_Request *rwaits = NULL,*swaits = NULL; 5658 MPI_Status rstatus; 5659 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5660 5661 PetscFunctionBegin; 5662 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5663 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5664 5665 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5666 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5667 } 5668 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5669 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5670 5671 if (size == 1) { 5672 startsj_s = NULL; 5673 bufa_ptr = NULL; 5674 *B_oth = NULL; 5675 PetscFunctionReturn(0); 5676 } 5677 5678 ctx = a->Mvctx; 5679 tag = ((PetscObject)ctx)->tag; 5680 5681 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5682 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5683 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5684 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5685 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5686 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5687 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5688 5689 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5690 if (scall == MAT_INITIAL_MATRIX) { 5691 /* i-array */ 5692 /*---------*/ 5693 /* post receives */ 5694 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5695 for (i=0; i<nrecvs; i++) { 5696 rowlen = rvalues + rstarts[i]*rbs; 5697 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5698 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5699 } 5700 5701 /* pack the outgoing message */ 5702 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5703 5704 sstartsj[0] = 0; 5705 rstartsj[0] = 0; 5706 len = 0; /* total length of j or a array to be sent */ 5707 if (nsends) { 5708 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5709 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5710 } 5711 for (i=0; i<nsends; i++) { 5712 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5713 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5714 for (j=0; j<nrows; j++) { 5715 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5716 for (l=0; l<sbs; l++) { 5717 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5718 5719 rowlen[j*sbs+l] = ncols; 5720 5721 len += ncols; 5722 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5723 } 5724 k++; 5725 } 5726 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5727 5728 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5729 } 5730 /* recvs and sends of i-array are completed */ 5731 i = nrecvs; 5732 while (i--) { 5733 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5734 } 5735 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5736 ierr = PetscFree(svalues);CHKERRQ(ierr); 5737 5738 /* allocate buffers for sending j and a arrays */ 5739 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5740 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5741 5742 /* create i-array of B_oth */ 5743 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5744 5745 b_othi[0] = 0; 5746 len = 0; /* total length of j or a array to be received */ 5747 k = 0; 5748 for (i=0; i<nrecvs; i++) { 5749 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5750 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5751 for (j=0; j<nrows; j++) { 5752 b_othi[k+1] = b_othi[k] + rowlen[j]; 5753 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5754 k++; 5755 } 5756 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5757 } 5758 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5759 5760 /* allocate space for j and a arrrays of B_oth */ 5761 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5762 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5763 5764 /* j-array */ 5765 /*---------*/ 5766 /* post receives of j-array */ 5767 for (i=0; i<nrecvs; i++) { 5768 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5769 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5770 } 5771 5772 /* pack the outgoing message j-array */ 5773 if (nsends) k = sstarts[0]; 5774 for (i=0; i<nsends; i++) { 5775 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5776 bufJ = bufj+sstartsj[i]; 5777 for (j=0; j<nrows; j++) { 5778 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5779 for (ll=0; ll<sbs; ll++) { 5780 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5781 for (l=0; l<ncols; l++) { 5782 *bufJ++ = cols[l]; 5783 } 5784 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5785 } 5786 } 5787 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5788 } 5789 5790 /* recvs and sends of j-array are completed */ 5791 i = nrecvs; 5792 while (i--) { 5793 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5794 } 5795 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5796 } else if (scall == MAT_REUSE_MATRIX) { 5797 sstartsj = *startsj_s; 5798 rstartsj = *startsj_r; 5799 bufa = *bufa_ptr; 5800 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5801 b_otha = b_oth->a; 5802 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5803 5804 /* a-array */ 5805 /*---------*/ 5806 /* post receives of a-array */ 5807 for (i=0; i<nrecvs; i++) { 5808 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5809 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5810 } 5811 5812 /* pack the outgoing message a-array */ 5813 if (nsends) k = sstarts[0]; 5814 for (i=0; i<nsends; i++) { 5815 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5816 bufA = bufa+sstartsj[i]; 5817 for (j=0; j<nrows; j++) { 5818 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5819 for (ll=0; ll<sbs; ll++) { 5820 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5821 for (l=0; l<ncols; l++) { 5822 *bufA++ = vals[l]; 5823 } 5824 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5825 } 5826 } 5827 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5828 } 5829 /* recvs and sends of a-array are completed */ 5830 i = nrecvs; 5831 while (i--) { 5832 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5833 } 5834 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5835 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5836 5837 if (scall == MAT_INITIAL_MATRIX) { 5838 /* put together the new matrix */ 5839 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5840 5841 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5842 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5843 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5844 b_oth->free_a = PETSC_TRUE; 5845 b_oth->free_ij = PETSC_TRUE; 5846 b_oth->nonew = 0; 5847 5848 ierr = PetscFree(bufj);CHKERRQ(ierr); 5849 if (!startsj_s || !bufa_ptr) { 5850 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5851 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5852 } else { 5853 *startsj_s = sstartsj; 5854 *startsj_r = rstartsj; 5855 *bufa_ptr = bufa; 5856 } 5857 } 5858 5859 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5860 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5861 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5862 PetscFunctionReturn(0); 5863 } 5864 5865 /*@C 5866 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5867 5868 Not Collective 5869 5870 Input Parameters: 5871 . A - The matrix in mpiaij format 5872 5873 Output Parameter: 5874 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5875 . colmap - A map from global column index to local index into lvec 5876 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5877 5878 Level: developer 5879 5880 @*/ 5881 #if defined(PETSC_USE_CTABLE) 5882 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5883 #else 5884 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5885 #endif 5886 { 5887 Mat_MPIAIJ *a; 5888 5889 PetscFunctionBegin; 5890 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5891 PetscValidPointer(lvec, 2); 5892 PetscValidPointer(colmap, 3); 5893 PetscValidPointer(multScatter, 4); 5894 a = (Mat_MPIAIJ*) A->data; 5895 if (lvec) *lvec = a->lvec; 5896 if (colmap) *colmap = a->colmap; 5897 if (multScatter) *multScatter = a->Mvctx; 5898 PetscFunctionReturn(0); 5899 } 5900 5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5903 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5904 #if defined(PETSC_HAVE_MKL_SPARSE) 5905 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5906 #endif 5907 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*); 5908 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5909 #if defined(PETSC_HAVE_ELEMENTAL) 5910 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5911 #endif 5912 #if defined(PETSC_HAVE_HYPRE) 5913 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5914 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5915 #endif 5916 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5917 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5918 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5919 5920 /* 5921 Computes (B'*A')' since computing B*A directly is untenable 5922 5923 n p p 5924 ( ) ( ) ( ) 5925 m ( A ) * n ( B ) = m ( C ) 5926 ( ) ( ) ( ) 5927 5928 */ 5929 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5930 { 5931 PetscErrorCode ierr; 5932 Mat At,Bt,Ct; 5933 5934 PetscFunctionBegin; 5935 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5936 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5937 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5938 ierr = MatDestroy(&At);CHKERRQ(ierr); 5939 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5940 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5941 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5942 PetscFunctionReturn(0); 5943 } 5944 5945 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5946 { 5947 PetscErrorCode ierr; 5948 PetscInt m=A->rmap->n,n=B->cmap->n; 5949 Mat Cmat; 5950 5951 PetscFunctionBegin; 5952 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5953 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5954 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5955 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5956 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5957 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5958 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5959 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5960 5961 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5962 5963 *C = Cmat; 5964 PetscFunctionReturn(0); 5965 } 5966 5967 /* ----------------------------------------------------------------*/ 5968 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5969 { 5970 PetscErrorCode ierr; 5971 5972 PetscFunctionBegin; 5973 if (scall == MAT_INITIAL_MATRIX) { 5974 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5975 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5976 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5977 } 5978 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5979 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5980 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5981 PetscFunctionReturn(0); 5982 } 5983 5984 /*MC 5985 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5986 5987 Options Database Keys: 5988 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5989 5990 Level: beginner 5991 5992 Notes: 5993 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5994 in this case the values associated with the rows and columns one passes in are set to zero 5995 in the matrix 5996 5997 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5998 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5999 6000 .seealso: MatCreateAIJ() 6001 M*/ 6002 6003 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6004 { 6005 Mat_MPIAIJ *b; 6006 PetscErrorCode ierr; 6007 PetscMPIInt size; 6008 6009 PetscFunctionBegin; 6010 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 6011 6012 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6013 B->data = (void*)b; 6014 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6015 B->assembled = PETSC_FALSE; 6016 B->insertmode = NOT_SET_VALUES; 6017 b->size = size; 6018 6019 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 6020 6021 /* build cache for off array entries formed */ 6022 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6023 6024 b->donotstash = PETSC_FALSE; 6025 b->colmap = 0; 6026 b->garray = 0; 6027 b->roworiented = PETSC_TRUE; 6028 6029 /* stuff used for matrix vector multiply */ 6030 b->lvec = NULL; 6031 b->Mvctx = NULL; 6032 6033 /* stuff for MatGetRow() */ 6034 b->rowindices = 0; 6035 b->rowvalues = 0; 6036 b->getrowactive = PETSC_FALSE; 6037 6038 /* flexible pointer used in CUSP/CUSPARSE classes */ 6039 b->spptr = NULL; 6040 6041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6044 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6046 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6049 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6051 #if defined(PETSC_HAVE_MKL_SPARSE) 6052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6053 #endif 6054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr); 6056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6057 #if defined(PETSC_HAVE_ELEMENTAL) 6058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6059 #endif 6060 #if defined(PETSC_HAVE_HYPRE) 6061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6062 #endif 6063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6064 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6065 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 6066 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 6067 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 6068 #if defined(PETSC_HAVE_HYPRE) 6069 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6070 #endif 6071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 6072 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6073 PetscFunctionReturn(0); 6074 } 6075 6076 /*@C 6077 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6078 and "off-diagonal" part of the matrix in CSR format. 6079 6080 Collective 6081 6082 Input Parameters: 6083 + comm - MPI communicator 6084 . m - number of local rows (Cannot be PETSC_DECIDE) 6085 . n - This value should be the same as the local size used in creating the 6086 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6087 calculated if N is given) For square matrices n is almost always m. 6088 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6089 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6090 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6091 . j - column indices 6092 . a - matrix values 6093 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6094 . oj - column indices 6095 - oa - matrix values 6096 6097 Output Parameter: 6098 . mat - the matrix 6099 6100 Level: advanced 6101 6102 Notes: 6103 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6104 must free the arrays once the matrix has been destroyed and not before. 6105 6106 The i and j indices are 0 based 6107 6108 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6109 6110 This sets local rows and cannot be used to set off-processor values. 6111 6112 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6113 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6114 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6115 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6116 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6117 communication if it is known that only local entries will be set. 6118 6119 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6120 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6121 @*/ 6122 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6123 { 6124 PetscErrorCode ierr; 6125 Mat_MPIAIJ *maij; 6126 6127 PetscFunctionBegin; 6128 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6129 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6130 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6131 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6132 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6133 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6134 maij = (Mat_MPIAIJ*) (*mat)->data; 6135 6136 (*mat)->preallocated = PETSC_TRUE; 6137 6138 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6139 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6140 6141 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6142 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6143 6144 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6145 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6146 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6147 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6148 6149 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6150 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6151 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6152 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6153 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6154 PetscFunctionReturn(0); 6155 } 6156 6157 /* 6158 Special version for direct calls from Fortran 6159 */ 6160 #include <petsc/private/fortranimpl.h> 6161 6162 /* Change these macros so can be used in void function */ 6163 #undef CHKERRQ 6164 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6165 #undef SETERRQ2 6166 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6167 #undef SETERRQ3 6168 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6169 #undef SETERRQ 6170 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6171 6172 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6173 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6174 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6175 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6176 #else 6177 #endif 6178 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6179 { 6180 Mat mat = *mmat; 6181 PetscInt m = *mm, n = *mn; 6182 InsertMode addv = *maddv; 6183 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6184 PetscScalar value; 6185 PetscErrorCode ierr; 6186 6187 MatCheckPreallocated(mat,1); 6188 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6189 6190 #if defined(PETSC_USE_DEBUG) 6191 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6192 #endif 6193 { 6194 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6195 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6196 PetscBool roworiented = aij->roworiented; 6197 6198 /* Some Variables required in the macro */ 6199 Mat A = aij->A; 6200 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6201 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6202 MatScalar *aa = a->a; 6203 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6204 Mat B = aij->B; 6205 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6206 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6207 MatScalar *ba = b->a; 6208 6209 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6210 PetscInt nonew = a->nonew; 6211 MatScalar *ap1,*ap2; 6212 6213 PetscFunctionBegin; 6214 for (i=0; i<m; i++) { 6215 if (im[i] < 0) continue; 6216 #if defined(PETSC_USE_DEBUG) 6217 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6218 #endif 6219 if (im[i] >= rstart && im[i] < rend) { 6220 row = im[i] - rstart; 6221 lastcol1 = -1; 6222 rp1 = aj + ai[row]; 6223 ap1 = aa + ai[row]; 6224 rmax1 = aimax[row]; 6225 nrow1 = ailen[row]; 6226 low1 = 0; 6227 high1 = nrow1; 6228 lastcol2 = -1; 6229 rp2 = bj + bi[row]; 6230 ap2 = ba + bi[row]; 6231 rmax2 = bimax[row]; 6232 nrow2 = bilen[row]; 6233 low2 = 0; 6234 high2 = nrow2; 6235 6236 for (j=0; j<n; j++) { 6237 if (roworiented) value = v[i*n+j]; 6238 else value = v[i+j*m]; 6239 if (in[j] >= cstart && in[j] < cend) { 6240 col = in[j] - cstart; 6241 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 6242 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6243 } else if (in[j] < 0) continue; 6244 #if defined(PETSC_USE_DEBUG) 6245 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6246 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6247 #endif 6248 else { 6249 if (mat->was_assembled) { 6250 if (!aij->colmap) { 6251 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6252 } 6253 #if defined(PETSC_USE_CTABLE) 6254 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6255 col--; 6256 #else 6257 col = aij->colmap[in[j]] - 1; 6258 #endif 6259 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 6260 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6261 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6262 col = in[j]; 6263 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6264 B = aij->B; 6265 b = (Mat_SeqAIJ*)B->data; 6266 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6267 rp2 = bj + bi[row]; 6268 ap2 = ba + bi[row]; 6269 rmax2 = bimax[row]; 6270 nrow2 = bilen[row]; 6271 low2 = 0; 6272 high2 = nrow2; 6273 bm = aij->B->rmap->n; 6274 ba = b->a; 6275 } 6276 } else col = in[j]; 6277 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6278 } 6279 } 6280 } else if (!aij->donotstash) { 6281 if (roworiented) { 6282 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6283 } else { 6284 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6285 } 6286 } 6287 } 6288 } 6289 PetscFunctionReturnVoid(); 6290 } 6291