1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/vecscatterimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: 22 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 23 enough exist. 24 25 Level: beginner 26 27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 28 M*/ 29 30 /*MC 31 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 34 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 41 42 Level: beginner 43 44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 45 M*/ 46 47 static PetscErrorCode MatPinToCPU_MPIAIJ(Mat A,PetscBool flg) 48 { 49 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 50 PetscErrorCode ierr; 51 52 PetscFunctionBegin; 53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 54 A->pinnedtocpu = flg; 55 #endif 56 if (a->A) { 57 ierr = MatPinToCPU(a->A,flg);CHKERRQ(ierr); 58 } 59 if (a->B) { 60 ierr = MatPinToCPU(a->B,flg);CHKERRQ(ierr); 61 } 62 PetscFunctionReturn(0); 63 } 64 65 66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 67 { 68 PetscErrorCode ierr; 69 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 70 71 PetscFunctionBegin; 72 if (mat->A) { 73 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 74 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 75 } 76 PetscFunctionReturn(0); 77 } 78 79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 80 { 81 PetscErrorCode ierr; 82 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 83 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 84 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 85 const PetscInt *ia,*ib; 86 const MatScalar *aa,*bb; 87 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 88 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 89 90 PetscFunctionBegin; 91 *keptrows = 0; 92 ia = a->i; 93 ib = b->i; 94 for (i=0; i<m; i++) { 95 na = ia[i+1] - ia[i]; 96 nb = ib[i+1] - ib[i]; 97 if (!na && !nb) { 98 cnt++; 99 goto ok1; 100 } 101 aa = a->a + ia[i]; 102 for (j=0; j<na; j++) { 103 if (aa[j] != 0.0) goto ok1; 104 } 105 bb = b->a + ib[i]; 106 for (j=0; j <nb; j++) { 107 if (bb[j] != 0.0) goto ok1; 108 } 109 cnt++; 110 ok1:; 111 } 112 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 113 if (!n0rows) PetscFunctionReturn(0); 114 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 115 cnt = 0; 116 for (i=0; i<m; i++) { 117 na = ia[i+1] - ia[i]; 118 nb = ib[i+1] - ib[i]; 119 if (!na && !nb) continue; 120 aa = a->a + ia[i]; 121 for (j=0; j<na;j++) { 122 if (aa[j] != 0.0) { 123 rows[cnt++] = rstart + i; 124 goto ok2; 125 } 126 } 127 bb = b->a + ib[i]; 128 for (j=0; j<nb; j++) { 129 if (bb[j] != 0.0) { 130 rows[cnt++] = rstart + i; 131 goto ok2; 132 } 133 } 134 ok2:; 135 } 136 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 141 { 142 PetscErrorCode ierr; 143 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 144 PetscBool cong; 145 146 PetscFunctionBegin; 147 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 148 if (Y->assembled && cong) { 149 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 150 } else { 151 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 152 } 153 PetscFunctionReturn(0); 154 } 155 156 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 157 { 158 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 159 PetscErrorCode ierr; 160 PetscInt i,rstart,nrows,*rows; 161 162 PetscFunctionBegin; 163 *zrows = NULL; 164 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 165 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 166 for (i=0; i<nrows; i++) rows[i] += rstart; 167 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 168 PetscFunctionReturn(0); 169 } 170 171 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 172 { 173 PetscErrorCode ierr; 174 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 175 PetscInt i,n,*garray = aij->garray; 176 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 177 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 178 PetscReal *work; 179 180 PetscFunctionBegin; 181 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 182 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 183 if (type == NORM_2) { 184 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 185 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 186 } 187 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 188 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 189 } 190 } else if (type == NORM_1) { 191 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 192 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 193 } 194 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 195 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 196 } 197 } else if (type == NORM_INFINITY) { 198 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 199 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 200 } 201 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 202 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 203 } 204 205 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 206 if (type == NORM_INFINITY) { 207 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 208 } else { 209 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 210 } 211 ierr = PetscFree(work);CHKERRQ(ierr); 212 if (type == NORM_2) { 213 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 214 } 215 PetscFunctionReturn(0); 216 } 217 218 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 219 { 220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 221 IS sis,gis; 222 PetscErrorCode ierr; 223 const PetscInt *isis,*igis; 224 PetscInt n,*iis,nsis,ngis,rstart,i; 225 226 PetscFunctionBegin; 227 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 228 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 229 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 230 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 231 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 232 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 233 234 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 235 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 236 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 237 n = ngis + nsis; 238 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 239 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 240 for (i=0; i<n; i++) iis[i] += rstart; 241 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 242 243 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 244 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 245 ierr = ISDestroy(&sis);CHKERRQ(ierr); 246 ierr = ISDestroy(&gis);CHKERRQ(ierr); 247 PetscFunctionReturn(0); 248 } 249 250 /* 251 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 252 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 253 254 Only for square matrices 255 256 Used by a preconditioner, hence PETSC_EXTERN 257 */ 258 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 259 { 260 PetscMPIInt rank,size; 261 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 262 PetscErrorCode ierr; 263 Mat mat; 264 Mat_SeqAIJ *gmata; 265 PetscMPIInt tag; 266 MPI_Status status; 267 PetscBool aij; 268 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 269 270 PetscFunctionBegin; 271 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 272 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 273 if (!rank) { 274 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 275 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 276 } 277 if (reuse == MAT_INITIAL_MATRIX) { 278 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 279 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 280 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 281 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 282 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 283 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 284 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 285 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 286 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 287 288 rowners[0] = 0; 289 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 290 rstart = rowners[rank]; 291 rend = rowners[rank+1]; 292 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 293 if (!rank) { 294 gmata = (Mat_SeqAIJ*) gmat->data; 295 /* send row lengths to all processors */ 296 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 297 for (i=1; i<size; i++) { 298 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 299 } 300 /* determine number diagonal and off-diagonal counts */ 301 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 302 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 303 jj = 0; 304 for (i=0; i<m; i++) { 305 for (j=0; j<dlens[i]; j++) { 306 if (gmata->j[jj] < rstart) ld[i]++; 307 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 308 jj++; 309 } 310 } 311 /* send column indices to other processes */ 312 for (i=1; i<size; i++) { 313 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 314 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 315 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 316 } 317 318 /* send numerical values to other processes */ 319 for (i=1; i<size; i++) { 320 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 321 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 322 } 323 gmataa = gmata->a; 324 gmataj = gmata->j; 325 326 } else { 327 /* receive row lengths */ 328 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 329 /* receive column indices */ 330 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 331 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 332 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 333 /* determine number diagonal and off-diagonal counts */ 334 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 335 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 336 jj = 0; 337 for (i=0; i<m; i++) { 338 for (j=0; j<dlens[i]; j++) { 339 if (gmataj[jj] < rstart) ld[i]++; 340 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 341 jj++; 342 } 343 } 344 /* receive numerical values */ 345 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 346 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 347 } 348 /* set preallocation */ 349 for (i=0; i<m; i++) { 350 dlens[i] -= olens[i]; 351 } 352 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 354 355 for (i=0; i<m; i++) { 356 dlens[i] += olens[i]; 357 } 358 cnt = 0; 359 for (i=0; i<m; i++) { 360 row = rstart + i; 361 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 362 cnt += dlens[i]; 363 } 364 if (rank) { 365 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 366 } 367 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 368 ierr = PetscFree(rowners);CHKERRQ(ierr); 369 370 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 371 372 *inmat = mat; 373 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 374 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 375 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 376 mat = *inmat; 377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 378 if (!rank) { 379 /* send numerical values to other processes */ 380 gmata = (Mat_SeqAIJ*) gmat->data; 381 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 382 gmataa = gmata->a; 383 for (i=1; i<size; i++) { 384 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 385 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 386 } 387 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 388 } else { 389 /* receive numerical values from process 0*/ 390 nz = Ad->nz + Ao->nz; 391 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 392 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 393 } 394 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 395 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 396 ad = Ad->a; 397 ao = Ao->a; 398 if (mat->rmap->n) { 399 i = 0; 400 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 401 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 402 } 403 for (i=1; i<mat->rmap->n; i++) { 404 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 405 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 406 } 407 i--; 408 if (mat->rmap->n) { 409 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 410 } 411 if (rank) { 412 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 413 } 414 } 415 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 416 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 417 PetscFunctionReturn(0); 418 } 419 420 /* 421 Local utility routine that creates a mapping from the global column 422 number to the local number in the off-diagonal part of the local 423 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 424 a slightly higher hash table cost; without it it is not scalable (each processor 425 has an order N integer array but is fast to acess. 426 */ 427 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 428 { 429 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 430 PetscErrorCode ierr; 431 PetscInt n = aij->B->cmap->n,i; 432 433 PetscFunctionBegin; 434 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 435 #if defined(PETSC_USE_CTABLE) 436 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 437 for (i=0; i<n; i++) { 438 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 439 } 440 #else 441 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 442 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 443 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 444 #endif 445 PetscFunctionReturn(0); 446 } 447 448 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 449 { \ 450 if (col <= lastcol1) low1 = 0; \ 451 else high1 = nrow1; \ 452 lastcol1 = col;\ 453 while (high1-low1 > 5) { \ 454 t = (low1+high1)/2; \ 455 if (rp1[t] > col) high1 = t; \ 456 else low1 = t; \ 457 } \ 458 for (_i=low1; _i<high1; _i++) { \ 459 if (rp1[_i] > col) break; \ 460 if (rp1[_i] == col) { \ 461 if (addv == ADD_VALUES) { \ 462 ap1[_i] += value; \ 463 /* Not sure LogFlops will slow dow the code or not */ \ 464 (void)PetscLogFlops(1.0); \ 465 } \ 466 else ap1[_i] = value; \ 467 goto a_noinsert; \ 468 } \ 469 } \ 470 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 471 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 472 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 473 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 474 N = nrow1++ - 1; a->nz++; high1++; \ 475 /* shift up all the later entries in this row */ \ 476 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 477 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 478 rp1[_i] = col; \ 479 ap1[_i] = value; \ 480 A->nonzerostate++;\ 481 a_noinsert: ; \ 482 ailen[row] = nrow1; \ 483 } 484 485 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 486 { \ 487 if (col <= lastcol2) low2 = 0; \ 488 else high2 = nrow2; \ 489 lastcol2 = col; \ 490 while (high2-low2 > 5) { \ 491 t = (low2+high2)/2; \ 492 if (rp2[t] > col) high2 = t; \ 493 else low2 = t; \ 494 } \ 495 for (_i=low2; _i<high2; _i++) { \ 496 if (rp2[_i] > col) break; \ 497 if (rp2[_i] == col) { \ 498 if (addv == ADD_VALUES) { \ 499 ap2[_i] += value; \ 500 (void)PetscLogFlops(1.0); \ 501 } \ 502 else ap2[_i] = value; \ 503 goto b_noinsert; \ 504 } \ 505 } \ 506 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 507 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 508 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 509 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 510 N = nrow2++ - 1; b->nz++; high2++; \ 511 /* shift up all the later entries in this row */ \ 512 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 513 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 514 rp2[_i] = col; \ 515 ap2[_i] = value; \ 516 B->nonzerostate++; \ 517 b_noinsert: ; \ 518 bilen[row] = nrow2; \ 519 } 520 521 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 522 { 523 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 524 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 525 PetscErrorCode ierr; 526 PetscInt l,*garray = mat->garray,diag; 527 528 PetscFunctionBegin; 529 /* code only works for square matrices A */ 530 531 /* find size of row to the left of the diagonal part */ 532 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 533 row = row - diag; 534 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 535 if (garray[b->j[b->i[row]+l]] > diag) break; 536 } 537 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 538 539 /* diagonal part */ 540 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 541 542 /* right of diagonal part */ 543 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 544 PetscFunctionReturn(0); 545 } 546 547 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 548 { 549 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 550 PetscScalar value = 0.0; 551 PetscErrorCode ierr; 552 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 553 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 554 PetscBool roworiented = aij->roworiented; 555 556 /* Some Variables required in the macro */ 557 Mat A = aij->A; 558 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 559 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 560 MatScalar *aa = a->a; 561 PetscBool ignorezeroentries = a->ignorezeroentries; 562 Mat B = aij->B; 563 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 564 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 565 MatScalar *ba = b->a; 566 567 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 568 PetscInt nonew; 569 MatScalar *ap1,*ap2; 570 571 PetscFunctionBegin; 572 for (i=0; i<m; i++) { 573 if (im[i] < 0) continue; 574 #if defined(PETSC_USE_DEBUG) 575 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 576 #endif 577 if (im[i] >= rstart && im[i] < rend) { 578 row = im[i] - rstart; 579 lastcol1 = -1; 580 rp1 = aj + ai[row]; 581 ap1 = aa + ai[row]; 582 rmax1 = aimax[row]; 583 nrow1 = ailen[row]; 584 low1 = 0; 585 high1 = nrow1; 586 lastcol2 = -1; 587 rp2 = bj + bi[row]; 588 ap2 = ba + bi[row]; 589 rmax2 = bimax[row]; 590 nrow2 = bilen[row]; 591 low2 = 0; 592 high2 = nrow2; 593 594 for (j=0; j<n; j++) { 595 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 596 if (in[j] >= cstart && in[j] < cend) { 597 col = in[j] - cstart; 598 nonew = a->nonew; 599 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 600 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 601 } else if (in[j] < 0) continue; 602 #if defined(PETSC_USE_DEBUG) 603 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 604 #endif 605 else { 606 if (mat->was_assembled) { 607 if (!aij->colmap) { 608 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 609 } 610 #if defined(PETSC_USE_CTABLE) 611 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 612 col--; 613 #else 614 col = aij->colmap[in[j]] - 1; 615 #endif 616 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 617 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 618 col = in[j]; 619 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 620 B = aij->B; 621 b = (Mat_SeqAIJ*)B->data; 622 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 623 rp2 = bj + bi[row]; 624 ap2 = ba + bi[row]; 625 rmax2 = bimax[row]; 626 nrow2 = bilen[row]; 627 low2 = 0; 628 high2 = nrow2; 629 bm = aij->B->rmap->n; 630 ba = b->a; 631 } else if (col < 0) { 632 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 633 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 634 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 635 } 636 } else col = in[j]; 637 nonew = b->nonew; 638 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 639 } 640 } 641 } else { 642 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 643 if (!aij->donotstash) { 644 mat->assembled = PETSC_FALSE; 645 if (roworiented) { 646 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 647 } else { 648 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 649 } 650 } 651 } 652 } 653 PetscFunctionReturn(0); 654 } 655 656 /* 657 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 658 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 659 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 660 */ 661 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 662 { 663 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 664 Mat A = aij->A; /* diagonal part of the matrix */ 665 Mat B = aij->B; /* offdiagonal part of the matrix */ 666 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 667 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 668 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 669 PetscInt *ailen = a->ilen,*aj = a->j; 670 PetscInt *bilen = b->ilen,*bj = b->j; 671 PetscInt am = aij->A->rmap->n,j; 672 PetscInt diag_so_far = 0,dnz; 673 PetscInt offd_so_far = 0,onz; 674 675 PetscFunctionBegin; 676 /* Iterate over all rows of the matrix */ 677 for (j=0; j<am; j++) { 678 dnz = onz = 0; 679 /* Iterate over all non-zero columns of the current row */ 680 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 681 /* If column is in the diagonal */ 682 if (mat_j[col] >= cstart && mat_j[col] < cend) { 683 aj[diag_so_far++] = mat_j[col] - cstart; 684 dnz++; 685 } else { /* off-diagonal entries */ 686 bj[offd_so_far++] = mat_j[col]; 687 onz++; 688 } 689 } 690 ailen[j] = dnz; 691 bilen[j] = onz; 692 } 693 PetscFunctionReturn(0); 694 } 695 696 /* 697 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 698 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 699 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 700 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 701 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 702 */ 703 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 704 { 705 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 706 Mat A = aij->A; /* diagonal part of the matrix */ 707 Mat B = aij->B; /* offdiagonal part of the matrix */ 708 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 709 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 710 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 711 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 712 PetscInt *ailen = a->ilen,*aj = a->j; 713 PetscInt *bilen = b->ilen,*bj = b->j; 714 PetscInt am = aij->A->rmap->n,j; 715 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 716 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 717 PetscScalar *aa = a->a,*ba = b->a; 718 719 PetscFunctionBegin; 720 /* Iterate over all rows of the matrix */ 721 for (j=0; j<am; j++) { 722 dnz_row = onz_row = 0; 723 rowstart_offd = full_offd_i[j]; 724 rowstart_diag = full_diag_i[j]; 725 /* Iterate over all non-zero columns of the current row */ 726 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 727 /* If column is in the diagonal */ 728 if (mat_j[col] >= cstart && mat_j[col] < cend) { 729 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 730 aa[rowstart_diag+dnz_row] = mat_a[col]; 731 dnz_row++; 732 } else { /* off-diagonal entries */ 733 bj[rowstart_offd+onz_row] = mat_j[col]; 734 ba[rowstart_offd+onz_row] = mat_a[col]; 735 onz_row++; 736 } 737 } 738 ailen[j] = dnz_row; 739 bilen[j] = onz_row; 740 } 741 PetscFunctionReturn(0); 742 } 743 744 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 745 { 746 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 747 PetscErrorCode ierr; 748 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 749 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 750 751 PetscFunctionBegin; 752 for (i=0; i<m; i++) { 753 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 754 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 755 if (idxm[i] >= rstart && idxm[i] < rend) { 756 row = idxm[i] - rstart; 757 for (j=0; j<n; j++) { 758 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 759 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 760 if (idxn[j] >= cstart && idxn[j] < cend) { 761 col = idxn[j] - cstart; 762 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 763 } else { 764 if (!aij->colmap) { 765 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 766 } 767 #if defined(PETSC_USE_CTABLE) 768 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 769 col--; 770 #else 771 col = aij->colmap[idxn[j]] - 1; 772 #endif 773 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 774 else { 775 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 776 } 777 } 778 } 779 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 780 } 781 PetscFunctionReturn(0); 782 } 783 784 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 785 786 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 787 { 788 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 789 PetscErrorCode ierr; 790 PetscInt nstash,reallocs; 791 792 PetscFunctionBegin; 793 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 794 795 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 796 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 797 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 798 PetscFunctionReturn(0); 799 } 800 801 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 802 { 803 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 804 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 805 PetscErrorCode ierr; 806 PetscMPIInt n; 807 PetscInt i,j,rstart,ncols,flg; 808 PetscInt *row,*col; 809 PetscBool other_disassembled; 810 PetscScalar *val; 811 812 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 813 814 PetscFunctionBegin; 815 if (!aij->donotstash && !mat->nooffprocentries) { 816 while (1) { 817 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 818 if (!flg) break; 819 820 for (i=0; i<n; ) { 821 /* Now identify the consecutive vals belonging to the same row */ 822 for (j=i,rstart=row[j]; j<n; j++) { 823 if (row[j] != rstart) break; 824 } 825 if (j < n) ncols = j-i; 826 else ncols = n-i; 827 /* Now assemble all these values with a single function call */ 828 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 829 830 i = j; 831 } 832 } 833 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 834 } 835 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 836 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 837 #endif 838 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 839 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 840 841 /* determine if any processor has disassembled, if so we must 842 also disassemble ourself, in order that we may reassemble. */ 843 /* 844 if nonzero structure of submatrix B cannot change then we know that 845 no processor disassembled thus we can skip this stuff 846 */ 847 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 848 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 849 if (mat->was_assembled && !other_disassembled) { 850 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 851 aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 852 #endif 853 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 854 } 855 } 856 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 857 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 858 } 859 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 860 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 861 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 862 #endif 863 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 864 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 865 866 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 867 868 aij->rowvalues = 0; 869 870 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 871 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 872 873 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 874 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 875 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 876 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 877 } 878 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 879 mat->offloadmask = PETSC_OFFLOAD_BOTH; 880 #endif 881 PetscFunctionReturn(0); 882 } 883 884 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 885 { 886 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 887 PetscErrorCode ierr; 888 889 PetscFunctionBegin; 890 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 891 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 892 PetscFunctionReturn(0); 893 } 894 895 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 896 { 897 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 898 PetscObjectState sA, sB; 899 PetscInt *lrows; 900 PetscInt r, len; 901 PetscBool cong, lch, gch; 902 PetscErrorCode ierr; 903 904 PetscFunctionBegin; 905 /* get locally owned rows */ 906 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 907 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 908 /* fix right hand side if needed */ 909 if (x && b) { 910 const PetscScalar *xx; 911 PetscScalar *bb; 912 913 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 914 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 915 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 916 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 917 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 918 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 919 } 920 921 sA = mat->A->nonzerostate; 922 sB = mat->B->nonzerostate; 923 924 if (diag != 0.0 && cong) { 925 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 926 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 927 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 928 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 929 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 930 PetscInt nnwA, nnwB; 931 PetscBool nnzA, nnzB; 932 933 nnwA = aijA->nonew; 934 nnwB = aijB->nonew; 935 nnzA = aijA->keepnonzeropattern; 936 nnzB = aijB->keepnonzeropattern; 937 if (!nnzA) { 938 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 939 aijA->nonew = 0; 940 } 941 if (!nnzB) { 942 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 943 aijB->nonew = 0; 944 } 945 /* Must zero here before the next loop */ 946 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 947 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 948 for (r = 0; r < len; ++r) { 949 const PetscInt row = lrows[r] + A->rmap->rstart; 950 if (row >= A->cmap->N) continue; 951 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 952 } 953 aijA->nonew = nnwA; 954 aijB->nonew = nnwB; 955 } else { 956 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 957 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 958 } 959 ierr = PetscFree(lrows);CHKERRQ(ierr); 960 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 961 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 962 963 /* reduce nonzerostate */ 964 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 965 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 966 if (gch) A->nonzerostate++; 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 971 { 972 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 973 PetscErrorCode ierr; 974 PetscMPIInt n = A->rmap->n; 975 PetscInt i,j,r,m,p = 0,len = 0; 976 PetscInt *lrows,*owners = A->rmap->range; 977 PetscSFNode *rrows; 978 PetscSF sf; 979 const PetscScalar *xx; 980 PetscScalar *bb,*mask; 981 Vec xmask,lmask; 982 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 983 const PetscInt *aj, *ii,*ridx; 984 PetscScalar *aa; 985 986 PetscFunctionBegin; 987 /* Create SF where leaves are input rows and roots are owned rows */ 988 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 989 for (r = 0; r < n; ++r) lrows[r] = -1; 990 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 991 for (r = 0; r < N; ++r) { 992 const PetscInt idx = rows[r]; 993 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 994 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 995 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 996 } 997 rrows[r].rank = p; 998 rrows[r].index = rows[r] - owners[p]; 999 } 1000 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 1001 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 1002 /* Collect flags for rows to be zeroed */ 1003 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1004 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 1005 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1006 /* Compress and put in row numbers */ 1007 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 1008 /* zero diagonal part of matrix */ 1009 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 1010 /* handle off diagonal part of matrix */ 1011 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 1012 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 1013 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 1014 for (i=0; i<len; i++) bb[lrows[i]] = 1; 1015 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 1016 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1017 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1018 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1019 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1020 PetscBool cong; 1021 1022 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1023 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1024 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1027 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1028 } 1029 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1030 /* remove zeroed rows of off diagonal matrix */ 1031 ii = aij->i; 1032 for (i=0; i<len; i++) { 1033 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1034 } 1035 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1036 if (aij->compressedrow.use) { 1037 m = aij->compressedrow.nrows; 1038 ii = aij->compressedrow.i; 1039 ridx = aij->compressedrow.rindex; 1040 for (i=0; i<m; i++) { 1041 n = ii[i+1] - ii[i]; 1042 aj = aij->j + ii[i]; 1043 aa = aij->a + ii[i]; 1044 1045 for (j=0; j<n; j++) { 1046 if (PetscAbsScalar(mask[*aj])) { 1047 if (b) bb[*ridx] -= *aa*xx[*aj]; 1048 *aa = 0.0; 1049 } 1050 aa++; 1051 aj++; 1052 } 1053 ridx++; 1054 } 1055 } else { /* do not use compressed row format */ 1056 m = l->B->rmap->n; 1057 for (i=0; i<m; i++) { 1058 n = ii[i+1] - ii[i]; 1059 aj = aij->j + ii[i]; 1060 aa = aij->a + ii[i]; 1061 for (j=0; j<n; j++) { 1062 if (PetscAbsScalar(mask[*aj])) { 1063 if (b) bb[i] -= *aa*xx[*aj]; 1064 *aa = 0.0; 1065 } 1066 aa++; 1067 aj++; 1068 } 1069 } 1070 } 1071 if (x && b) { 1072 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1073 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1074 } 1075 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1076 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1077 ierr = PetscFree(lrows);CHKERRQ(ierr); 1078 1079 /* only change matrix nonzero state if pattern was allowed to be changed */ 1080 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1081 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1082 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1088 { 1089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 PetscInt nt; 1092 VecScatter Mvctx = a->Mvctx; 1093 1094 PetscFunctionBegin; 1095 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1096 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1097 1098 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1099 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1100 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1101 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1112 PetscFunctionReturn(0); 1113 } 1114 1115 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1116 { 1117 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1118 PetscErrorCode ierr; 1119 VecScatter Mvctx = a->Mvctx; 1120 1121 PetscFunctionBegin; 1122 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1123 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1124 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1125 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1126 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1131 { 1132 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1133 PetscErrorCode ierr; 1134 1135 PetscFunctionBegin; 1136 /* do nondiagonal part */ 1137 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1138 /* do local part */ 1139 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1140 /* add partial results together */ 1141 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1142 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1143 PetscFunctionReturn(0); 1144 } 1145 1146 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1147 { 1148 MPI_Comm comm; 1149 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1150 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1151 IS Me,Notme; 1152 PetscErrorCode ierr; 1153 PetscInt M,N,first,last,*notme,i; 1154 PetscBool lf; 1155 PetscMPIInt size; 1156 1157 PetscFunctionBegin; 1158 /* Easy test: symmetric diagonal block */ 1159 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1160 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1161 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1162 if (!*f) PetscFunctionReturn(0); 1163 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1164 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1165 if (size == 1) PetscFunctionReturn(0); 1166 1167 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1168 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1169 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1170 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1171 for (i=0; i<first; i++) notme[i] = i; 1172 for (i=last; i<M; i++) notme[i-last+first] = i; 1173 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1174 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1175 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1176 Aoff = Aoffs[0]; 1177 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1178 Boff = Boffs[0]; 1179 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1180 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1181 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1182 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1183 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1184 ierr = PetscFree(notme);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1189 { 1190 PetscErrorCode ierr; 1191 1192 PetscFunctionBegin; 1193 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1198 { 1199 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1200 PetscErrorCode ierr; 1201 1202 PetscFunctionBegin; 1203 /* do nondiagonal part */ 1204 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1205 /* do local part */ 1206 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1207 /* add partial results together */ 1208 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1209 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 /* 1214 This only works correctly for square matrices where the subblock A->A is the 1215 diagonal block 1216 */ 1217 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1218 { 1219 PetscErrorCode ierr; 1220 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1221 1222 PetscFunctionBegin; 1223 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1224 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1225 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1226 PetscFunctionReturn(0); 1227 } 1228 1229 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1230 { 1231 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1232 PetscErrorCode ierr; 1233 1234 PetscFunctionBegin; 1235 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1236 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1241 { 1242 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1243 PetscErrorCode ierr; 1244 1245 PetscFunctionBegin; 1246 #if defined(PETSC_USE_LOG) 1247 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1248 #endif 1249 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1250 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1251 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1252 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1253 #if defined(PETSC_USE_CTABLE) 1254 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1255 #else 1256 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1257 #endif 1258 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1259 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1260 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1261 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1262 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1263 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1264 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1265 1266 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1267 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1269 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1270 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1271 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1272 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1273 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1274 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1275 #if defined(PETSC_HAVE_ELEMENTAL) 1276 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1277 #endif 1278 #if defined(PETSC_HAVE_HYPRE) 1279 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1280 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1281 #endif 1282 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1283 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1284 PetscFunctionReturn(0); 1285 } 1286 1287 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1288 { 1289 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1290 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1291 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1292 PetscErrorCode ierr; 1293 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1294 int fd; 1295 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1296 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1297 PetscScalar *column_values; 1298 PetscInt message_count,flowcontrolcount; 1299 FILE *file; 1300 1301 PetscFunctionBegin; 1302 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1303 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1304 nz = A->nz + B->nz; 1305 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1306 if (!rank) { 1307 header[0] = MAT_FILE_CLASSID; 1308 header[1] = mat->rmap->N; 1309 header[2] = mat->cmap->N; 1310 1311 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1312 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1313 /* get largest number of rows any processor has */ 1314 rlen = mat->rmap->n; 1315 range = mat->rmap->range; 1316 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1317 } else { 1318 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1319 rlen = mat->rmap->n; 1320 } 1321 1322 /* load up the local row counts */ 1323 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1324 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1325 1326 /* store the row lengths to the file */ 1327 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1328 if (!rank) { 1329 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1330 for (i=1; i<size; i++) { 1331 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1332 rlen = range[i+1] - range[i]; 1333 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1334 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1335 } 1336 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1337 } else { 1338 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1339 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1340 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1341 } 1342 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1343 1344 /* load up the local column indices */ 1345 nzmax = nz; /* th processor needs space a largest processor needs */ 1346 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1347 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1348 cnt = 0; 1349 for (i=0; i<mat->rmap->n; i++) { 1350 for (j=B->i[i]; j<B->i[i+1]; j++) { 1351 if ((col = garray[B->j[j]]) > cstart) break; 1352 column_indices[cnt++] = col; 1353 } 1354 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1355 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1356 } 1357 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1358 1359 /* store the column indices to the file */ 1360 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1361 if (!rank) { 1362 MPI_Status status; 1363 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1364 for (i=1; i<size; i++) { 1365 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1366 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1367 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1368 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1369 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1370 } 1371 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1372 } else { 1373 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1374 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1375 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1376 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1377 } 1378 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1379 1380 /* load up the local column values */ 1381 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1382 cnt = 0; 1383 for (i=0; i<mat->rmap->n; i++) { 1384 for (j=B->i[i]; j<B->i[i+1]; j++) { 1385 if (garray[B->j[j]] > cstart) break; 1386 column_values[cnt++] = B->a[j]; 1387 } 1388 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1389 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1390 } 1391 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1392 1393 /* store the column values to the file */ 1394 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1395 if (!rank) { 1396 MPI_Status status; 1397 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1398 for (i=1; i<size; i++) { 1399 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1400 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1401 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1402 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1403 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1404 } 1405 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1406 } else { 1407 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1408 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1409 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1410 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1411 } 1412 ierr = PetscFree(column_values);CHKERRQ(ierr); 1413 1414 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1415 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1416 PetscFunctionReturn(0); 1417 } 1418 1419 #include <petscdraw.h> 1420 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1421 { 1422 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1423 PetscErrorCode ierr; 1424 PetscMPIInt rank = aij->rank,size = aij->size; 1425 PetscBool isdraw,iascii,isbinary; 1426 PetscViewer sviewer; 1427 PetscViewerFormat format; 1428 1429 PetscFunctionBegin; 1430 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1431 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1432 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1433 if (iascii) { 1434 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1435 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1436 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1437 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1438 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1439 for (i=0; i<(PetscInt)size; i++) { 1440 nmax = PetscMax(nmax,nz[i]); 1441 nmin = PetscMin(nmin,nz[i]); 1442 navg += nz[i]; 1443 } 1444 ierr = PetscFree(nz);CHKERRQ(ierr); 1445 navg = navg/size; 1446 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1447 PetscFunctionReturn(0); 1448 } 1449 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1450 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1451 MatInfo info; 1452 PetscBool inodes; 1453 1454 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1455 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1456 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1457 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1458 if (!inodes) { 1459 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1460 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1461 } else { 1462 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1463 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1464 } 1465 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1466 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1467 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1468 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1469 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1470 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1471 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1472 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1473 PetscFunctionReturn(0); 1474 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1475 PetscInt inodecount,inodelimit,*inodes; 1476 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1477 if (inodes) { 1478 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1479 } else { 1480 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1481 } 1482 PetscFunctionReturn(0); 1483 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1484 PetscFunctionReturn(0); 1485 } 1486 } else if (isbinary) { 1487 if (size == 1) { 1488 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1489 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1490 } else { 1491 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1492 } 1493 PetscFunctionReturn(0); 1494 } else if (iascii && size == 1) { 1495 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1496 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1497 PetscFunctionReturn(0); 1498 } else if (isdraw) { 1499 PetscDraw draw; 1500 PetscBool isnull; 1501 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1502 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1503 if (isnull) PetscFunctionReturn(0); 1504 } 1505 1506 { /* assemble the entire matrix onto first processor */ 1507 Mat A = NULL, Av; 1508 IS isrow,iscol; 1509 1510 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1511 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1512 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1513 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1514 /* The commented code uses MatCreateSubMatrices instead */ 1515 /* 1516 Mat *AA, A = NULL, Av; 1517 IS isrow,iscol; 1518 1519 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1520 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1521 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1522 if (!rank) { 1523 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1524 A = AA[0]; 1525 Av = AA[0]; 1526 } 1527 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1528 */ 1529 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1530 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1531 /* 1532 Everyone has to call to draw the matrix since the graphics waits are 1533 synchronized across all processors that share the PetscDraw object 1534 */ 1535 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1536 if (!rank) { 1537 if (((PetscObject)mat)->name) { 1538 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1539 } 1540 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1541 } 1542 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1543 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1544 ierr = MatDestroy(&A);CHKERRQ(ierr); 1545 } 1546 PetscFunctionReturn(0); 1547 } 1548 1549 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1550 { 1551 PetscErrorCode ierr; 1552 PetscBool iascii,isdraw,issocket,isbinary; 1553 1554 PetscFunctionBegin; 1555 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1556 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1557 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1558 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1559 if (iascii || isdraw || isbinary || issocket) { 1560 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1561 } 1562 PetscFunctionReturn(0); 1563 } 1564 1565 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1566 { 1567 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1568 PetscErrorCode ierr; 1569 Vec bb1 = 0; 1570 PetscBool hasop; 1571 1572 PetscFunctionBegin; 1573 if (flag == SOR_APPLY_UPPER) { 1574 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1575 PetscFunctionReturn(0); 1576 } 1577 1578 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1579 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1580 } 1581 1582 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1583 if (flag & SOR_ZERO_INITIAL_GUESS) { 1584 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1585 its--; 1586 } 1587 1588 while (its--) { 1589 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1590 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1591 1592 /* update rhs: bb1 = bb - B*x */ 1593 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1594 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1595 1596 /* local sweep */ 1597 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1598 } 1599 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1600 if (flag & SOR_ZERO_INITIAL_GUESS) { 1601 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1602 its--; 1603 } 1604 while (its--) { 1605 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1606 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1607 1608 /* update rhs: bb1 = bb - B*x */ 1609 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1610 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1611 1612 /* local sweep */ 1613 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1614 } 1615 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1616 if (flag & SOR_ZERO_INITIAL_GUESS) { 1617 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1618 its--; 1619 } 1620 while (its--) { 1621 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1622 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1623 1624 /* update rhs: bb1 = bb - B*x */ 1625 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1626 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1627 1628 /* local sweep */ 1629 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1630 } 1631 } else if (flag & SOR_EISENSTAT) { 1632 Vec xx1; 1633 1634 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1635 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1636 1637 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1638 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1639 if (!mat->diag) { 1640 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1641 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1642 } 1643 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1644 if (hasop) { 1645 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1646 } else { 1647 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1648 } 1649 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1650 1651 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1652 1653 /* local sweep */ 1654 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1655 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1656 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1657 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1658 1659 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1660 1661 matin->factorerrortype = mat->A->factorerrortype; 1662 PetscFunctionReturn(0); 1663 } 1664 1665 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1666 { 1667 Mat aA,aB,Aperm; 1668 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1669 PetscScalar *aa,*ba; 1670 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1671 PetscSF rowsf,sf; 1672 IS parcolp = NULL; 1673 PetscBool done; 1674 PetscErrorCode ierr; 1675 1676 PetscFunctionBegin; 1677 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1678 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1679 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1680 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1681 1682 /* Invert row permutation to find out where my rows should go */ 1683 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1684 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1685 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1686 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1687 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1688 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1689 1690 /* Invert column permutation to find out where my columns should go */ 1691 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1692 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1693 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1694 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1695 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1696 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1697 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1698 1699 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1700 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1701 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1702 1703 /* Find out where my gcols should go */ 1704 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1705 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1706 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1707 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1708 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1709 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1710 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1711 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1712 1713 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1714 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1715 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1716 for (i=0; i<m; i++) { 1717 PetscInt row = rdest[i],rowner; 1718 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1719 for (j=ai[i]; j<ai[i+1]; j++) { 1720 PetscInt cowner,col = cdest[aj[j]]; 1721 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1722 if (rowner == cowner) dnnz[i]++; 1723 else onnz[i]++; 1724 } 1725 for (j=bi[i]; j<bi[i+1]; j++) { 1726 PetscInt cowner,col = gcdest[bj[j]]; 1727 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1728 if (rowner == cowner) dnnz[i]++; 1729 else onnz[i]++; 1730 } 1731 } 1732 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1733 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1734 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1735 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1736 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1737 1738 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1739 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1740 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1741 for (i=0; i<m; i++) { 1742 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1743 PetscInt j0,rowlen; 1744 rowlen = ai[i+1] - ai[i]; 1745 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1746 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1747 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1748 } 1749 rowlen = bi[i+1] - bi[i]; 1750 for (j0=j=0; j<rowlen; j0=j) { 1751 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1752 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1753 } 1754 } 1755 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1756 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1757 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1758 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1759 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1760 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1761 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1762 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1763 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1764 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1765 *B = Aperm; 1766 PetscFunctionReturn(0); 1767 } 1768 1769 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1770 { 1771 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1772 PetscErrorCode ierr; 1773 1774 PetscFunctionBegin; 1775 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1776 if (ghosts) *ghosts = aij->garray; 1777 PetscFunctionReturn(0); 1778 } 1779 1780 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1781 { 1782 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1783 Mat A = mat->A,B = mat->B; 1784 PetscErrorCode ierr; 1785 PetscLogDouble isend[5],irecv[5]; 1786 1787 PetscFunctionBegin; 1788 info->block_size = 1.0; 1789 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1790 1791 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1792 isend[3] = info->memory; isend[4] = info->mallocs; 1793 1794 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1795 1796 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1797 isend[3] += info->memory; isend[4] += info->mallocs; 1798 if (flag == MAT_LOCAL) { 1799 info->nz_used = isend[0]; 1800 info->nz_allocated = isend[1]; 1801 info->nz_unneeded = isend[2]; 1802 info->memory = isend[3]; 1803 info->mallocs = isend[4]; 1804 } else if (flag == MAT_GLOBAL_MAX) { 1805 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1806 1807 info->nz_used = irecv[0]; 1808 info->nz_allocated = irecv[1]; 1809 info->nz_unneeded = irecv[2]; 1810 info->memory = irecv[3]; 1811 info->mallocs = irecv[4]; 1812 } else if (flag == MAT_GLOBAL_SUM) { 1813 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1814 1815 info->nz_used = irecv[0]; 1816 info->nz_allocated = irecv[1]; 1817 info->nz_unneeded = irecv[2]; 1818 info->memory = irecv[3]; 1819 info->mallocs = irecv[4]; 1820 } 1821 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1822 info->fill_ratio_needed = 0; 1823 info->factor_mallocs = 0; 1824 PetscFunctionReturn(0); 1825 } 1826 1827 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1828 { 1829 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1830 PetscErrorCode ierr; 1831 1832 PetscFunctionBegin; 1833 switch (op) { 1834 case MAT_NEW_NONZERO_LOCATIONS: 1835 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1836 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1837 case MAT_KEEP_NONZERO_PATTERN: 1838 case MAT_NEW_NONZERO_LOCATION_ERR: 1839 case MAT_USE_INODES: 1840 case MAT_IGNORE_ZERO_ENTRIES: 1841 MatCheckPreallocated(A,1); 1842 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1843 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1844 break; 1845 case MAT_ROW_ORIENTED: 1846 MatCheckPreallocated(A,1); 1847 a->roworiented = flg; 1848 1849 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1850 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1851 break; 1852 case MAT_NEW_DIAGONALS: 1853 case MAT_SORTED_FULL: 1854 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1855 break; 1856 case MAT_IGNORE_OFF_PROC_ENTRIES: 1857 a->donotstash = flg; 1858 break; 1859 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1860 case MAT_SPD: 1861 case MAT_SYMMETRIC: 1862 case MAT_STRUCTURALLY_SYMMETRIC: 1863 case MAT_HERMITIAN: 1864 case MAT_SYMMETRY_ETERNAL: 1865 break; 1866 case MAT_SUBMAT_SINGLEIS: 1867 A->submat_singleis = flg; 1868 break; 1869 case MAT_STRUCTURE_ONLY: 1870 /* The option is handled directly by MatSetOption() */ 1871 break; 1872 default: 1873 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1874 } 1875 PetscFunctionReturn(0); 1876 } 1877 1878 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1879 { 1880 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1881 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1882 PetscErrorCode ierr; 1883 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1884 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1885 PetscInt *cmap,*idx_p; 1886 1887 PetscFunctionBegin; 1888 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1889 mat->getrowactive = PETSC_TRUE; 1890 1891 if (!mat->rowvalues && (idx || v)) { 1892 /* 1893 allocate enough space to hold information from the longest row. 1894 */ 1895 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1896 PetscInt max = 1,tmp; 1897 for (i=0; i<matin->rmap->n; i++) { 1898 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1899 if (max < tmp) max = tmp; 1900 } 1901 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1902 } 1903 1904 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1905 lrow = row - rstart; 1906 1907 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1908 if (!v) {pvA = 0; pvB = 0;} 1909 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1910 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1911 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1912 nztot = nzA + nzB; 1913 1914 cmap = mat->garray; 1915 if (v || idx) { 1916 if (nztot) { 1917 /* Sort by increasing column numbers, assuming A and B already sorted */ 1918 PetscInt imark = -1; 1919 if (v) { 1920 *v = v_p = mat->rowvalues; 1921 for (i=0; i<nzB; i++) { 1922 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1923 else break; 1924 } 1925 imark = i; 1926 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1927 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1928 } 1929 if (idx) { 1930 *idx = idx_p = mat->rowindices; 1931 if (imark > -1) { 1932 for (i=0; i<imark; i++) { 1933 idx_p[i] = cmap[cworkB[i]]; 1934 } 1935 } else { 1936 for (i=0; i<nzB; i++) { 1937 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1938 else break; 1939 } 1940 imark = i; 1941 } 1942 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1943 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1944 } 1945 } else { 1946 if (idx) *idx = 0; 1947 if (v) *v = 0; 1948 } 1949 } 1950 *nz = nztot; 1951 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1952 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1953 PetscFunctionReturn(0); 1954 } 1955 1956 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1957 { 1958 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1959 1960 PetscFunctionBegin; 1961 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1962 aij->getrowactive = PETSC_FALSE; 1963 PetscFunctionReturn(0); 1964 } 1965 1966 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1967 { 1968 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1969 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1970 PetscErrorCode ierr; 1971 PetscInt i,j,cstart = mat->cmap->rstart; 1972 PetscReal sum = 0.0; 1973 MatScalar *v; 1974 1975 PetscFunctionBegin; 1976 if (aij->size == 1) { 1977 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1978 } else { 1979 if (type == NORM_FROBENIUS) { 1980 v = amat->a; 1981 for (i=0; i<amat->nz; i++) { 1982 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1983 } 1984 v = bmat->a; 1985 for (i=0; i<bmat->nz; i++) { 1986 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1987 } 1988 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1989 *norm = PetscSqrtReal(*norm); 1990 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1991 } else if (type == NORM_1) { /* max column norm */ 1992 PetscReal *tmp,*tmp2; 1993 PetscInt *jj,*garray = aij->garray; 1994 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1995 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1996 *norm = 0.0; 1997 v = amat->a; jj = amat->j; 1998 for (j=0; j<amat->nz; j++) { 1999 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 2000 } 2001 v = bmat->a; jj = bmat->j; 2002 for (j=0; j<bmat->nz; j++) { 2003 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 2004 } 2005 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2006 for (j=0; j<mat->cmap->N; j++) { 2007 if (tmp2[j] > *norm) *norm = tmp2[j]; 2008 } 2009 ierr = PetscFree(tmp);CHKERRQ(ierr); 2010 ierr = PetscFree(tmp2);CHKERRQ(ierr); 2011 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2012 } else if (type == NORM_INFINITY) { /* max row norm */ 2013 PetscReal ntemp = 0.0; 2014 for (j=0; j<aij->A->rmap->n; j++) { 2015 v = amat->a + amat->i[j]; 2016 sum = 0.0; 2017 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2018 sum += PetscAbsScalar(*v); v++; 2019 } 2020 v = bmat->a + bmat->i[j]; 2021 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2022 sum += PetscAbsScalar(*v); v++; 2023 } 2024 if (sum > ntemp) ntemp = sum; 2025 } 2026 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2027 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2028 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2029 } 2030 PetscFunctionReturn(0); 2031 } 2032 2033 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2034 { 2035 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2036 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2037 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2038 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2039 PetscErrorCode ierr; 2040 Mat B,A_diag,*B_diag; 2041 const MatScalar *array; 2042 2043 PetscFunctionBegin; 2044 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2045 ai = Aloc->i; aj = Aloc->j; 2046 bi = Bloc->i; bj = Bloc->j; 2047 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2048 PetscInt *d_nnz,*g_nnz,*o_nnz; 2049 PetscSFNode *oloc; 2050 PETSC_UNUSED PetscSF sf; 2051 2052 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2053 /* compute d_nnz for preallocation */ 2054 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2055 for (i=0; i<ai[ma]; i++) { 2056 d_nnz[aj[i]]++; 2057 } 2058 /* compute local off-diagonal contributions */ 2059 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2060 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2061 /* map those to global */ 2062 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2063 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2064 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2065 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2066 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2067 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2068 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2069 2070 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2071 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2072 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2073 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2074 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2075 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2076 } else { 2077 B = *matout; 2078 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2079 } 2080 2081 b = (Mat_MPIAIJ*)B->data; 2082 A_diag = a->A; 2083 B_diag = &b->A; 2084 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2085 A_diag_ncol = A_diag->cmap->N; 2086 B_diag_ilen = sub_B_diag->ilen; 2087 B_diag_i = sub_B_diag->i; 2088 2089 /* Set ilen for diagonal of B */ 2090 for (i=0; i<A_diag_ncol; i++) { 2091 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2092 } 2093 2094 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2095 very quickly (=without using MatSetValues), because all writes are local. */ 2096 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2097 2098 /* copy over the B part */ 2099 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2100 array = Bloc->a; 2101 row = A->rmap->rstart; 2102 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2103 cols_tmp = cols; 2104 for (i=0; i<mb; i++) { 2105 ncol = bi[i+1]-bi[i]; 2106 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2107 row++; 2108 array += ncol; cols_tmp += ncol; 2109 } 2110 ierr = PetscFree(cols);CHKERRQ(ierr); 2111 2112 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2113 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2114 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2115 *matout = B; 2116 } else { 2117 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2118 } 2119 PetscFunctionReturn(0); 2120 } 2121 2122 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2123 { 2124 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2125 Mat a = aij->A,b = aij->B; 2126 PetscErrorCode ierr; 2127 PetscInt s1,s2,s3; 2128 2129 PetscFunctionBegin; 2130 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2131 if (rr) { 2132 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2133 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2134 /* Overlap communication with computation. */ 2135 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2136 } 2137 if (ll) { 2138 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2139 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2140 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2141 } 2142 /* scale the diagonal block */ 2143 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2144 2145 if (rr) { 2146 /* Do a scatter end and then right scale the off-diagonal block */ 2147 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2148 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2149 } 2150 PetscFunctionReturn(0); 2151 } 2152 2153 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2154 { 2155 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2156 PetscErrorCode ierr; 2157 2158 PetscFunctionBegin; 2159 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2160 PetscFunctionReturn(0); 2161 } 2162 2163 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2164 { 2165 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2166 Mat a,b,c,d; 2167 PetscBool flg; 2168 PetscErrorCode ierr; 2169 2170 PetscFunctionBegin; 2171 a = matA->A; b = matA->B; 2172 c = matB->A; d = matB->B; 2173 2174 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2175 if (flg) { 2176 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2177 } 2178 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2179 PetscFunctionReturn(0); 2180 } 2181 2182 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2183 { 2184 PetscErrorCode ierr; 2185 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2186 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2187 2188 PetscFunctionBegin; 2189 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2190 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2191 /* because of the column compression in the off-processor part of the matrix a->B, 2192 the number of columns in a->B and b->B may be different, hence we cannot call 2193 the MatCopy() directly on the two parts. If need be, we can provide a more 2194 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2195 then copying the submatrices */ 2196 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2197 } else { 2198 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2199 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2200 } 2201 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2202 PetscFunctionReturn(0); 2203 } 2204 2205 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2206 { 2207 PetscErrorCode ierr; 2208 2209 PetscFunctionBegin; 2210 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 /* 2215 Computes the number of nonzeros per row needed for preallocation when X and Y 2216 have different nonzero structure. 2217 */ 2218 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2219 { 2220 PetscInt i,j,k,nzx,nzy; 2221 2222 PetscFunctionBegin; 2223 /* Set the number of nonzeros in the new matrix */ 2224 for (i=0; i<m; i++) { 2225 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2226 nzx = xi[i+1] - xi[i]; 2227 nzy = yi[i+1] - yi[i]; 2228 nnz[i] = 0; 2229 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2230 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2231 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2232 nnz[i]++; 2233 } 2234 for (; k<nzy; k++) nnz[i]++; 2235 } 2236 PetscFunctionReturn(0); 2237 } 2238 2239 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2240 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2241 { 2242 PetscErrorCode ierr; 2243 PetscInt m = Y->rmap->N; 2244 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2245 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2246 2247 PetscFunctionBegin; 2248 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2249 PetscFunctionReturn(0); 2250 } 2251 2252 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2253 { 2254 PetscErrorCode ierr; 2255 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2256 PetscBLASInt bnz,one=1; 2257 Mat_SeqAIJ *x,*y; 2258 2259 PetscFunctionBegin; 2260 if (str == SAME_NONZERO_PATTERN) { 2261 PetscScalar alpha = a; 2262 x = (Mat_SeqAIJ*)xx->A->data; 2263 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2264 y = (Mat_SeqAIJ*)yy->A->data; 2265 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2266 x = (Mat_SeqAIJ*)xx->B->data; 2267 y = (Mat_SeqAIJ*)yy->B->data; 2268 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2269 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2270 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2271 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2272 will be updated */ 2273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2274 if (Y->offloadmask != PETSC_OFFLOAD_UNALLOCATED) { 2275 Y->offloadmask = PETSC_OFFLOAD_CPU; 2276 } 2277 #endif 2278 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2279 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2280 } else { 2281 Mat B; 2282 PetscInt *nnz_d,*nnz_o; 2283 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2284 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2285 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2286 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2287 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2288 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2289 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2290 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2291 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2292 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2293 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2294 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2295 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2296 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2297 } 2298 PetscFunctionReturn(0); 2299 } 2300 2301 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2302 2303 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2304 { 2305 #if defined(PETSC_USE_COMPLEX) 2306 PetscErrorCode ierr; 2307 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2308 2309 PetscFunctionBegin; 2310 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2311 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2312 #else 2313 PetscFunctionBegin; 2314 #endif 2315 PetscFunctionReturn(0); 2316 } 2317 2318 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2319 { 2320 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2321 PetscErrorCode ierr; 2322 2323 PetscFunctionBegin; 2324 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2325 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2326 PetscFunctionReturn(0); 2327 } 2328 2329 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2330 { 2331 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2332 PetscErrorCode ierr; 2333 2334 PetscFunctionBegin; 2335 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2336 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2341 { 2342 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2343 PetscErrorCode ierr; 2344 PetscInt i,*idxb = 0; 2345 PetscScalar *va,*vb; 2346 Vec vtmp; 2347 2348 PetscFunctionBegin; 2349 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2350 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2351 if (idx) { 2352 for (i=0; i<A->rmap->n; i++) { 2353 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2354 } 2355 } 2356 2357 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2358 if (idx) { 2359 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2360 } 2361 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2362 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2363 2364 for (i=0; i<A->rmap->n; i++) { 2365 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2366 va[i] = vb[i]; 2367 if (idx) idx[i] = a->garray[idxb[i]]; 2368 } 2369 } 2370 2371 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2372 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2373 ierr = PetscFree(idxb);CHKERRQ(ierr); 2374 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } 2377 2378 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2379 { 2380 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2381 PetscErrorCode ierr; 2382 PetscInt i,*idxb = 0; 2383 PetscScalar *va,*vb; 2384 Vec vtmp; 2385 2386 PetscFunctionBegin; 2387 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2388 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2389 if (idx) { 2390 for (i=0; i<A->cmap->n; i++) { 2391 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2392 } 2393 } 2394 2395 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2396 if (idx) { 2397 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2398 } 2399 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2400 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2401 2402 for (i=0; i<A->rmap->n; i++) { 2403 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2404 va[i] = vb[i]; 2405 if (idx) idx[i] = a->garray[idxb[i]]; 2406 } 2407 } 2408 2409 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2410 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2411 ierr = PetscFree(idxb);CHKERRQ(ierr); 2412 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2413 PetscFunctionReturn(0); 2414 } 2415 2416 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2417 { 2418 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2419 PetscInt n = A->rmap->n; 2420 PetscInt cstart = A->cmap->rstart; 2421 PetscInt *cmap = mat->garray; 2422 PetscInt *diagIdx, *offdiagIdx; 2423 Vec diagV, offdiagV; 2424 PetscScalar *a, *diagA, *offdiagA; 2425 PetscInt r; 2426 PetscErrorCode ierr; 2427 2428 PetscFunctionBegin; 2429 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2430 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2431 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2432 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2433 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2434 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2435 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2436 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2437 for (r = 0; r < n; ++r) { 2438 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2439 a[r] = diagA[r]; 2440 idx[r] = cstart + diagIdx[r]; 2441 } else { 2442 a[r] = offdiagA[r]; 2443 idx[r] = cmap[offdiagIdx[r]]; 2444 } 2445 } 2446 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2447 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2448 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2449 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2450 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2451 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2456 { 2457 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2458 PetscInt n = A->rmap->n; 2459 PetscInt cstart = A->cmap->rstart; 2460 PetscInt *cmap = mat->garray; 2461 PetscInt *diagIdx, *offdiagIdx; 2462 Vec diagV, offdiagV; 2463 PetscScalar *a, *diagA, *offdiagA; 2464 PetscInt r; 2465 PetscErrorCode ierr; 2466 2467 PetscFunctionBegin; 2468 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2469 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2470 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2471 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2472 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2473 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2474 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2475 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2476 for (r = 0; r < n; ++r) { 2477 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2478 a[r] = diagA[r]; 2479 idx[r] = cstart + diagIdx[r]; 2480 } else { 2481 a[r] = offdiagA[r]; 2482 idx[r] = cmap[offdiagIdx[r]]; 2483 } 2484 } 2485 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2486 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2487 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2488 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2489 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2490 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2491 PetscFunctionReturn(0); 2492 } 2493 2494 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2495 { 2496 PetscErrorCode ierr; 2497 Mat *dummy; 2498 2499 PetscFunctionBegin; 2500 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2501 *newmat = *dummy; 2502 ierr = PetscFree(dummy);CHKERRQ(ierr); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2507 { 2508 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2509 PetscErrorCode ierr; 2510 2511 PetscFunctionBegin; 2512 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2513 A->factorerrortype = a->A->factorerrortype; 2514 PetscFunctionReturn(0); 2515 } 2516 2517 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2518 { 2519 PetscErrorCode ierr; 2520 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2521 2522 PetscFunctionBegin; 2523 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2524 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2525 if (x->assembled) { 2526 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2527 } else { 2528 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2529 } 2530 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2531 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2532 PetscFunctionReturn(0); 2533 } 2534 2535 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2536 { 2537 PetscFunctionBegin; 2538 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2539 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2540 PetscFunctionReturn(0); 2541 } 2542 2543 /*@ 2544 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2545 2546 Collective on Mat 2547 2548 Input Parameters: 2549 + A - the matrix 2550 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2551 2552 Level: advanced 2553 2554 @*/ 2555 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2556 { 2557 PetscErrorCode ierr; 2558 2559 PetscFunctionBegin; 2560 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2561 PetscFunctionReturn(0); 2562 } 2563 2564 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2565 { 2566 PetscErrorCode ierr; 2567 PetscBool sc = PETSC_FALSE,flg; 2568 2569 PetscFunctionBegin; 2570 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2571 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2572 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2573 if (flg) { 2574 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2575 } 2576 ierr = PetscOptionsTail();CHKERRQ(ierr); 2577 PetscFunctionReturn(0); 2578 } 2579 2580 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2581 { 2582 PetscErrorCode ierr; 2583 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2584 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2585 2586 PetscFunctionBegin; 2587 if (!Y->preallocated) { 2588 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2589 } else if (!aij->nz) { 2590 PetscInt nonew = aij->nonew; 2591 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2592 aij->nonew = nonew; 2593 } 2594 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2595 PetscFunctionReturn(0); 2596 } 2597 2598 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2599 { 2600 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2601 PetscErrorCode ierr; 2602 2603 PetscFunctionBegin; 2604 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2605 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2606 if (d) { 2607 PetscInt rstart; 2608 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2609 *d += rstart; 2610 2611 } 2612 PetscFunctionReturn(0); 2613 } 2614 2615 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2616 { 2617 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2618 PetscErrorCode ierr; 2619 2620 PetscFunctionBegin; 2621 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2622 PetscFunctionReturn(0); 2623 } 2624 2625 /* -------------------------------------------------------------------*/ 2626 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2627 MatGetRow_MPIAIJ, 2628 MatRestoreRow_MPIAIJ, 2629 MatMult_MPIAIJ, 2630 /* 4*/ MatMultAdd_MPIAIJ, 2631 MatMultTranspose_MPIAIJ, 2632 MatMultTransposeAdd_MPIAIJ, 2633 0, 2634 0, 2635 0, 2636 /*10*/ 0, 2637 0, 2638 0, 2639 MatSOR_MPIAIJ, 2640 MatTranspose_MPIAIJ, 2641 /*15*/ MatGetInfo_MPIAIJ, 2642 MatEqual_MPIAIJ, 2643 MatGetDiagonal_MPIAIJ, 2644 MatDiagonalScale_MPIAIJ, 2645 MatNorm_MPIAIJ, 2646 /*20*/ MatAssemblyBegin_MPIAIJ, 2647 MatAssemblyEnd_MPIAIJ, 2648 MatSetOption_MPIAIJ, 2649 MatZeroEntries_MPIAIJ, 2650 /*24*/ MatZeroRows_MPIAIJ, 2651 0, 2652 0, 2653 0, 2654 0, 2655 /*29*/ MatSetUp_MPIAIJ, 2656 0, 2657 0, 2658 MatGetDiagonalBlock_MPIAIJ, 2659 0, 2660 /*34*/ MatDuplicate_MPIAIJ, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*39*/ MatAXPY_MPIAIJ, 2666 MatCreateSubMatrices_MPIAIJ, 2667 MatIncreaseOverlap_MPIAIJ, 2668 MatGetValues_MPIAIJ, 2669 MatCopy_MPIAIJ, 2670 /*44*/ MatGetRowMax_MPIAIJ, 2671 MatScale_MPIAIJ, 2672 MatShift_MPIAIJ, 2673 MatDiagonalSet_MPIAIJ, 2674 MatZeroRowsColumns_MPIAIJ, 2675 /*49*/ MatSetRandom_MPIAIJ, 2676 0, 2677 0, 2678 0, 2679 0, 2680 /*54*/ MatFDColoringCreate_MPIXAIJ, 2681 0, 2682 MatSetUnfactored_MPIAIJ, 2683 MatPermute_MPIAIJ, 2684 0, 2685 /*59*/ MatCreateSubMatrix_MPIAIJ, 2686 MatDestroy_MPIAIJ, 2687 MatView_MPIAIJ, 2688 0, 2689 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2690 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2691 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2692 0, 2693 0, 2694 0, 2695 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2696 MatGetRowMinAbs_MPIAIJ, 2697 0, 2698 0, 2699 0, 2700 0, 2701 /*75*/ MatFDColoringApply_AIJ, 2702 MatSetFromOptions_MPIAIJ, 2703 0, 2704 0, 2705 MatFindZeroDiagonals_MPIAIJ, 2706 /*80*/ 0, 2707 0, 2708 0, 2709 /*83*/ MatLoad_MPIAIJ, 2710 MatIsSymmetric_MPIAIJ, 2711 0, 2712 0, 2713 0, 2714 0, 2715 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2716 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2717 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2718 MatPtAP_MPIAIJ_MPIAIJ, 2719 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2720 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2721 0, 2722 0, 2723 0, 2724 MatPinToCPU_MPIAIJ, 2725 /*99*/ 0, 2726 0, 2727 0, 2728 MatConjugate_MPIAIJ, 2729 0, 2730 /*104*/MatSetValuesRow_MPIAIJ, 2731 MatRealPart_MPIAIJ, 2732 MatImaginaryPart_MPIAIJ, 2733 0, 2734 0, 2735 /*109*/0, 2736 0, 2737 MatGetRowMin_MPIAIJ, 2738 0, 2739 MatMissingDiagonal_MPIAIJ, 2740 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2741 0, 2742 MatGetGhosts_MPIAIJ, 2743 0, 2744 0, 2745 /*119*/0, 2746 0, 2747 0, 2748 0, 2749 MatGetMultiProcBlock_MPIAIJ, 2750 /*124*/MatFindNonzeroRows_MPIAIJ, 2751 MatGetColumnNorms_MPIAIJ, 2752 MatInvertBlockDiagonal_MPIAIJ, 2753 MatInvertVariableBlockDiagonal_MPIAIJ, 2754 MatCreateSubMatricesMPI_MPIAIJ, 2755 /*129*/0, 2756 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2757 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2758 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2759 0, 2760 /*134*/0, 2761 0, 2762 MatRARt_MPIAIJ_MPIAIJ, 2763 0, 2764 0, 2765 /*139*/MatSetBlockSizes_MPIAIJ, 2766 0, 2767 0, 2768 MatFDColoringSetUp_MPIXAIJ, 2769 MatFindOffBlockDiagonalEntries_MPIAIJ, 2770 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2771 }; 2772 2773 /* ----------------------------------------------------------------------------------------*/ 2774 2775 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2776 { 2777 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2778 PetscErrorCode ierr; 2779 2780 PetscFunctionBegin; 2781 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2782 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2783 PetscFunctionReturn(0); 2784 } 2785 2786 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2787 { 2788 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2789 PetscErrorCode ierr; 2790 2791 PetscFunctionBegin; 2792 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2793 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2794 PetscFunctionReturn(0); 2795 } 2796 2797 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2798 { 2799 Mat_MPIAIJ *b; 2800 PetscErrorCode ierr; 2801 PetscMPIInt size; 2802 2803 PetscFunctionBegin; 2804 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2805 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2806 b = (Mat_MPIAIJ*)B->data; 2807 2808 #if defined(PETSC_USE_CTABLE) 2809 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2810 #else 2811 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2812 #endif 2813 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2814 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2815 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2816 2817 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2818 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2819 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2820 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2821 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2822 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2823 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2824 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2825 2826 if (!B->preallocated) { 2827 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2828 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2829 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2830 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2831 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2832 } 2833 2834 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2835 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2836 B->preallocated = PETSC_TRUE; 2837 B->was_assembled = PETSC_FALSE; 2838 B->assembled = PETSC_FALSE; 2839 PetscFunctionReturn(0); 2840 } 2841 2842 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2843 { 2844 Mat_MPIAIJ *b; 2845 PetscErrorCode ierr; 2846 2847 PetscFunctionBegin; 2848 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2849 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2850 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2851 b = (Mat_MPIAIJ*)B->data; 2852 2853 #if defined(PETSC_USE_CTABLE) 2854 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2855 #else 2856 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2857 #endif 2858 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2859 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2860 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2861 2862 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2863 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2864 B->preallocated = PETSC_TRUE; 2865 B->was_assembled = PETSC_FALSE; 2866 B->assembled = PETSC_FALSE; 2867 PetscFunctionReturn(0); 2868 } 2869 2870 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2871 { 2872 Mat mat; 2873 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2874 PetscErrorCode ierr; 2875 2876 PetscFunctionBegin; 2877 *newmat = 0; 2878 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2879 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2880 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2881 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2882 a = (Mat_MPIAIJ*)mat->data; 2883 2884 mat->factortype = matin->factortype; 2885 mat->assembled = PETSC_TRUE; 2886 mat->insertmode = NOT_SET_VALUES; 2887 mat->preallocated = PETSC_TRUE; 2888 2889 a->size = oldmat->size; 2890 a->rank = oldmat->rank; 2891 a->donotstash = oldmat->donotstash; 2892 a->roworiented = oldmat->roworiented; 2893 a->rowindices = 0; 2894 a->rowvalues = 0; 2895 a->getrowactive = PETSC_FALSE; 2896 2897 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2898 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2899 2900 if (oldmat->colmap) { 2901 #if defined(PETSC_USE_CTABLE) 2902 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2903 #else 2904 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2905 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2906 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2907 #endif 2908 } else a->colmap = 0; 2909 if (oldmat->garray) { 2910 PetscInt len; 2911 len = oldmat->B->cmap->n; 2912 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2913 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2914 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2915 } else a->garray = 0; 2916 2917 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2918 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2919 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2920 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2921 2922 if (oldmat->Mvctx_mpi1) { 2923 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2924 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2925 } 2926 2927 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2928 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2929 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2930 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2931 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2932 *newmat = mat; 2933 PetscFunctionReturn(0); 2934 } 2935 2936 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2937 { 2938 PetscBool isbinary, ishdf5; 2939 PetscErrorCode ierr; 2940 2941 PetscFunctionBegin; 2942 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2943 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2944 /* force binary viewer to load .info file if it has not yet done so */ 2945 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2946 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2947 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2948 if (isbinary) { 2949 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2950 } else if (ishdf5) { 2951 #if defined(PETSC_HAVE_HDF5) 2952 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2953 #else 2954 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2955 #endif 2956 } else { 2957 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2958 } 2959 PetscFunctionReturn(0); 2960 } 2961 2962 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2963 { 2964 PetscScalar *vals,*svals; 2965 MPI_Comm comm; 2966 PetscErrorCode ierr; 2967 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2968 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2969 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2970 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2971 PetscInt cend,cstart,n,*rowners; 2972 int fd; 2973 PetscInt bs = newMat->rmap->bs; 2974 2975 PetscFunctionBegin; 2976 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2977 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2978 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2979 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2980 if (!rank) { 2981 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2982 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2983 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2984 } 2985 2986 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2987 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2988 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2989 if (bs < 0) bs = 1; 2990 2991 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2992 M = header[1]; N = header[2]; 2993 2994 /* If global sizes are set, check if they are consistent with that given in the file */ 2995 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2996 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2997 2998 /* determine ownership of all (block) rows */ 2999 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 3000 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 3001 else m = newMat->rmap->n; /* Set by user */ 3002 3003 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 3004 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 3005 3006 /* First process needs enough room for process with most rows */ 3007 if (!rank) { 3008 mmax = rowners[1]; 3009 for (i=2; i<=size; i++) { 3010 mmax = PetscMax(mmax, rowners[i]); 3011 } 3012 } else mmax = -1; /* unused, but compilers complain */ 3013 3014 rowners[0] = 0; 3015 for (i=2; i<=size; i++) { 3016 rowners[i] += rowners[i-1]; 3017 } 3018 rstart = rowners[rank]; 3019 rend = rowners[rank+1]; 3020 3021 /* distribute row lengths to all processors */ 3022 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3023 if (!rank) { 3024 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 3025 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3026 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3027 for (j=0; j<m; j++) { 3028 procsnz[0] += ourlens[j]; 3029 } 3030 for (i=1; i<size; i++) { 3031 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 3032 /* calculate the number of nonzeros on each processor */ 3033 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3034 procsnz[i] += rowlengths[j]; 3035 } 3036 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3037 } 3038 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3039 } else { 3040 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3041 } 3042 3043 if (!rank) { 3044 /* determine max buffer needed and allocate it */ 3045 maxnz = 0; 3046 for (i=0; i<size; i++) { 3047 maxnz = PetscMax(maxnz,procsnz[i]); 3048 } 3049 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3050 3051 /* read in my part of the matrix column indices */ 3052 nz = procsnz[0]; 3053 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3054 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3055 3056 /* read in every one elses and ship off */ 3057 for (i=1; i<size; i++) { 3058 nz = procsnz[i]; 3059 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3060 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3061 } 3062 ierr = PetscFree(cols);CHKERRQ(ierr); 3063 } else { 3064 /* determine buffer space needed for message */ 3065 nz = 0; 3066 for (i=0; i<m; i++) { 3067 nz += ourlens[i]; 3068 } 3069 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3070 3071 /* receive message of column indices*/ 3072 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3073 } 3074 3075 /* determine column ownership if matrix is not square */ 3076 if (N != M) { 3077 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3078 else n = newMat->cmap->n; 3079 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3080 cstart = cend - n; 3081 } else { 3082 cstart = rstart; 3083 cend = rend; 3084 n = cend - cstart; 3085 } 3086 3087 /* loop over local rows, determining number of off diagonal entries */ 3088 ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr); 3089 jj = 0; 3090 for (i=0; i<m; i++) { 3091 for (j=0; j<ourlens[i]; j++) { 3092 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3093 jj++; 3094 } 3095 } 3096 3097 for (i=0; i<m; i++) { 3098 ourlens[i] -= offlens[i]; 3099 } 3100 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3101 3102 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3103 3104 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3105 3106 for (i=0; i<m; i++) { 3107 ourlens[i] += offlens[i]; 3108 } 3109 3110 if (!rank) { 3111 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3112 3113 /* read in my part of the matrix numerical values */ 3114 nz = procsnz[0]; 3115 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3116 3117 /* insert into matrix */ 3118 jj = rstart; 3119 smycols = mycols; 3120 svals = vals; 3121 for (i=0; i<m; i++) { 3122 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3123 smycols += ourlens[i]; 3124 svals += ourlens[i]; 3125 jj++; 3126 } 3127 3128 /* read in other processors and ship out */ 3129 for (i=1; i<size; i++) { 3130 nz = procsnz[i]; 3131 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3132 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3133 } 3134 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3135 } else { 3136 /* receive numeric values */ 3137 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3138 3139 /* receive message of values*/ 3140 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3141 3142 /* insert into matrix */ 3143 jj = rstart; 3144 smycols = mycols; 3145 svals = vals; 3146 for (i=0; i<m; i++) { 3147 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3148 smycols += ourlens[i]; 3149 svals += ourlens[i]; 3150 jj++; 3151 } 3152 } 3153 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3154 ierr = PetscFree(vals);CHKERRQ(ierr); 3155 ierr = PetscFree(mycols);CHKERRQ(ierr); 3156 ierr = PetscFree(rowners);CHKERRQ(ierr); 3157 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3158 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3159 PetscFunctionReturn(0); 3160 } 3161 3162 /* Not scalable because of ISAllGather() unless getting all columns. */ 3163 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3164 { 3165 PetscErrorCode ierr; 3166 IS iscol_local; 3167 PetscBool isstride; 3168 PetscMPIInt lisstride=0,gisstride; 3169 3170 PetscFunctionBegin; 3171 /* check if we are grabbing all columns*/ 3172 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3173 3174 if (isstride) { 3175 PetscInt start,len,mstart,mlen; 3176 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3177 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3178 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3179 if (mstart == start && mlen-mstart == len) lisstride = 1; 3180 } 3181 3182 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3183 if (gisstride) { 3184 PetscInt N; 3185 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3186 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3187 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3188 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3189 } else { 3190 PetscInt cbs; 3191 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3192 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3193 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3194 } 3195 3196 *isseq = iscol_local; 3197 PetscFunctionReturn(0); 3198 } 3199 3200 /* 3201 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3202 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3203 3204 Input Parameters: 3205 mat - matrix 3206 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3207 i.e., mat->rstart <= isrow[i] < mat->rend 3208 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3209 i.e., mat->cstart <= iscol[i] < mat->cend 3210 Output Parameter: 3211 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3212 iscol_o - sequential column index set for retrieving mat->B 3213 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3214 */ 3215 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3216 { 3217 PetscErrorCode ierr; 3218 Vec x,cmap; 3219 const PetscInt *is_idx; 3220 PetscScalar *xarray,*cmaparray; 3221 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3222 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3223 Mat B=a->B; 3224 Vec lvec=a->lvec,lcmap; 3225 PetscInt i,cstart,cend,Bn=B->cmap->N; 3226 MPI_Comm comm; 3227 VecScatter Mvctx=a->Mvctx; 3228 3229 PetscFunctionBegin; 3230 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3231 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3232 3233 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3234 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3235 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3236 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3237 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3238 3239 /* Get start indices */ 3240 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3241 isstart -= ncols; 3242 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3243 3244 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3245 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3246 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3247 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3248 for (i=0; i<ncols; i++) { 3249 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3250 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3251 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3252 } 3253 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3254 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3255 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3256 3257 /* Get iscol_d */ 3258 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3259 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3260 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3261 3262 /* Get isrow_d */ 3263 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3264 rstart = mat->rmap->rstart; 3265 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3266 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3267 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3268 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3269 3270 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3271 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3272 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3273 3274 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3275 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3276 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3277 3278 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3279 3280 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3281 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3282 3283 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3284 /* off-process column indices */ 3285 count = 0; 3286 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3287 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3288 3289 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3290 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3291 for (i=0; i<Bn; i++) { 3292 if (PetscRealPart(xarray[i]) > -1.0) { 3293 idx[count] = i; /* local column index in off-diagonal part B */ 3294 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3295 count++; 3296 } 3297 } 3298 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3299 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3300 3301 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3302 /* cannot ensure iscol_o has same blocksize as iscol! */ 3303 3304 ierr = PetscFree(idx);CHKERRQ(ierr); 3305 *garray = cmap1; 3306 3307 ierr = VecDestroy(&x);CHKERRQ(ierr); 3308 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3309 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3310 PetscFunctionReturn(0); 3311 } 3312 3313 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3314 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3315 { 3316 PetscErrorCode ierr; 3317 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3318 Mat M = NULL; 3319 MPI_Comm comm; 3320 IS iscol_d,isrow_d,iscol_o; 3321 Mat Asub = NULL,Bsub = NULL; 3322 PetscInt n; 3323 3324 PetscFunctionBegin; 3325 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3326 3327 if (call == MAT_REUSE_MATRIX) { 3328 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3329 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3330 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3331 3332 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3333 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3334 3335 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3336 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3337 3338 /* Update diagonal and off-diagonal portions of submat */ 3339 asub = (Mat_MPIAIJ*)(*submat)->data; 3340 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3341 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3342 if (n) { 3343 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3344 } 3345 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3346 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3347 3348 } else { /* call == MAT_INITIAL_MATRIX) */ 3349 const PetscInt *garray; 3350 PetscInt BsubN; 3351 3352 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3353 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3354 3355 /* Create local submatrices Asub and Bsub */ 3356 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3357 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3358 3359 /* Create submatrix M */ 3360 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3361 3362 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3363 asub = (Mat_MPIAIJ*)M->data; 3364 3365 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3366 n = asub->B->cmap->N; 3367 if (BsubN > n) { 3368 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3369 const PetscInt *idx; 3370 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3371 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3372 3373 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3374 j = 0; 3375 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3376 for (i=0; i<n; i++) { 3377 if (j >= BsubN) break; 3378 while (subgarray[i] > garray[j]) j++; 3379 3380 if (subgarray[i] == garray[j]) { 3381 idx_new[i] = idx[j++]; 3382 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3383 } 3384 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3385 3386 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3387 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3388 3389 } else if (BsubN < n) { 3390 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3391 } 3392 3393 ierr = PetscFree(garray);CHKERRQ(ierr); 3394 *submat = M; 3395 3396 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3397 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3398 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3399 3400 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3401 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3402 3403 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3404 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3405 } 3406 PetscFunctionReturn(0); 3407 } 3408 3409 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3410 { 3411 PetscErrorCode ierr; 3412 IS iscol_local=NULL,isrow_d; 3413 PetscInt csize; 3414 PetscInt n,i,j,start,end; 3415 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3416 MPI_Comm comm; 3417 3418 PetscFunctionBegin; 3419 /* If isrow has same processor distribution as mat, 3420 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3421 if (call == MAT_REUSE_MATRIX) { 3422 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3423 if (isrow_d) { 3424 sameRowDist = PETSC_TRUE; 3425 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3426 } else { 3427 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3428 if (iscol_local) { 3429 sameRowDist = PETSC_TRUE; 3430 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3431 } 3432 } 3433 } else { 3434 /* Check if isrow has same processor distribution as mat */ 3435 sameDist[0] = PETSC_FALSE; 3436 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3437 if (!n) { 3438 sameDist[0] = PETSC_TRUE; 3439 } else { 3440 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3441 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3442 if (i >= start && j < end) { 3443 sameDist[0] = PETSC_TRUE; 3444 } 3445 } 3446 3447 /* Check if iscol has same processor distribution as mat */ 3448 sameDist[1] = PETSC_FALSE; 3449 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3450 if (!n) { 3451 sameDist[1] = PETSC_TRUE; 3452 } else { 3453 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3454 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3455 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3456 } 3457 3458 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3459 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3460 sameRowDist = tsameDist[0]; 3461 } 3462 3463 if (sameRowDist) { 3464 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3465 /* isrow and iscol have same processor distribution as mat */ 3466 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3467 PetscFunctionReturn(0); 3468 } else { /* sameRowDist */ 3469 /* isrow has same processor distribution as mat */ 3470 if (call == MAT_INITIAL_MATRIX) { 3471 PetscBool sorted; 3472 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3473 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3474 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3475 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3476 3477 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3478 if (sorted) { 3479 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3480 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3481 PetscFunctionReturn(0); 3482 } 3483 } else { /* call == MAT_REUSE_MATRIX */ 3484 IS iscol_sub; 3485 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3486 if (iscol_sub) { 3487 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3488 PetscFunctionReturn(0); 3489 } 3490 } 3491 } 3492 } 3493 3494 /* General case: iscol -> iscol_local which has global size of iscol */ 3495 if (call == MAT_REUSE_MATRIX) { 3496 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3497 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3498 } else { 3499 if (!iscol_local) { 3500 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3501 } 3502 } 3503 3504 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3505 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3506 3507 if (call == MAT_INITIAL_MATRIX) { 3508 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3509 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3510 } 3511 PetscFunctionReturn(0); 3512 } 3513 3514 /*@C 3515 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3516 and "off-diagonal" part of the matrix in CSR format. 3517 3518 Collective 3519 3520 Input Parameters: 3521 + comm - MPI communicator 3522 . A - "diagonal" portion of matrix 3523 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3524 - garray - global index of B columns 3525 3526 Output Parameter: 3527 . mat - the matrix, with input A as its local diagonal matrix 3528 Level: advanced 3529 3530 Notes: 3531 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3532 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3533 3534 .seealso: MatCreateMPIAIJWithSplitArrays() 3535 @*/ 3536 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3537 { 3538 PetscErrorCode ierr; 3539 Mat_MPIAIJ *maij; 3540 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3541 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3542 PetscScalar *oa=b->a; 3543 Mat Bnew; 3544 PetscInt m,n,N; 3545 3546 PetscFunctionBegin; 3547 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3548 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3549 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3550 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3551 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3552 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3553 3554 /* Get global columns of mat */ 3555 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3556 3557 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3558 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3559 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3560 maij = (Mat_MPIAIJ*)(*mat)->data; 3561 3562 (*mat)->preallocated = PETSC_TRUE; 3563 3564 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3565 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3566 3567 /* Set A as diagonal portion of *mat */ 3568 maij->A = A; 3569 3570 nz = oi[m]; 3571 for (i=0; i<nz; i++) { 3572 col = oj[i]; 3573 oj[i] = garray[col]; 3574 } 3575 3576 /* Set Bnew as off-diagonal portion of *mat */ 3577 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3578 bnew = (Mat_SeqAIJ*)Bnew->data; 3579 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3580 maij->B = Bnew; 3581 3582 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3583 3584 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3585 b->free_a = PETSC_FALSE; 3586 b->free_ij = PETSC_FALSE; 3587 ierr = MatDestroy(&B);CHKERRQ(ierr); 3588 3589 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3590 bnew->free_a = PETSC_TRUE; 3591 bnew->free_ij = PETSC_TRUE; 3592 3593 /* condense columns of maij->B */ 3594 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3595 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3596 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3597 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3598 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3599 PetscFunctionReturn(0); 3600 } 3601 3602 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3603 3604 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3605 { 3606 PetscErrorCode ierr; 3607 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3608 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3609 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3610 Mat M,Msub,B=a->B; 3611 MatScalar *aa; 3612 Mat_SeqAIJ *aij; 3613 PetscInt *garray = a->garray,*colsub,Ncols; 3614 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3615 IS iscol_sub,iscmap; 3616 const PetscInt *is_idx,*cmap; 3617 PetscBool allcolumns=PETSC_FALSE; 3618 MPI_Comm comm; 3619 3620 PetscFunctionBegin; 3621 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3622 3623 if (call == MAT_REUSE_MATRIX) { 3624 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3625 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3626 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3627 3628 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3629 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3630 3631 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3632 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3633 3634 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3635 3636 } else { /* call == MAT_INITIAL_MATRIX) */ 3637 PetscBool flg; 3638 3639 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3640 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3641 3642 /* (1) iscol -> nonscalable iscol_local */ 3643 /* Check for special case: each processor gets entire matrix columns */ 3644 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3645 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3646 if (allcolumns) { 3647 iscol_sub = iscol_local; 3648 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3649 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3650 3651 } else { 3652 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3653 PetscInt *idx,*cmap1,k; 3654 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3655 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3656 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3657 count = 0; 3658 k = 0; 3659 for (i=0; i<Ncols; i++) { 3660 j = is_idx[i]; 3661 if (j >= cstart && j < cend) { 3662 /* diagonal part of mat */ 3663 idx[count] = j; 3664 cmap1[count++] = i; /* column index in submat */ 3665 } else if (Bn) { 3666 /* off-diagonal part of mat */ 3667 if (j == garray[k]) { 3668 idx[count] = j; 3669 cmap1[count++] = i; /* column index in submat */ 3670 } else if (j > garray[k]) { 3671 while (j > garray[k] && k < Bn-1) k++; 3672 if (j == garray[k]) { 3673 idx[count] = j; 3674 cmap1[count++] = i; /* column index in submat */ 3675 } 3676 } 3677 } 3678 } 3679 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3680 3681 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3682 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3683 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3684 3685 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3686 } 3687 3688 /* (3) Create sequential Msub */ 3689 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3690 } 3691 3692 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3693 aij = (Mat_SeqAIJ*)(Msub)->data; 3694 ii = aij->i; 3695 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3696 3697 /* 3698 m - number of local rows 3699 Ncols - number of columns (same on all processors) 3700 rstart - first row in new global matrix generated 3701 */ 3702 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3703 3704 if (call == MAT_INITIAL_MATRIX) { 3705 /* (4) Create parallel newmat */ 3706 PetscMPIInt rank,size; 3707 PetscInt csize; 3708 3709 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3710 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3711 3712 /* 3713 Determine the number of non-zeros in the diagonal and off-diagonal 3714 portions of the matrix in order to do correct preallocation 3715 */ 3716 3717 /* first get start and end of "diagonal" columns */ 3718 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3719 if (csize == PETSC_DECIDE) { 3720 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3721 if (mglobal == Ncols) { /* square matrix */ 3722 nlocal = m; 3723 } else { 3724 nlocal = Ncols/size + ((Ncols % size) > rank); 3725 } 3726 } else { 3727 nlocal = csize; 3728 } 3729 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3730 rstart = rend - nlocal; 3731 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3732 3733 /* next, compute all the lengths */ 3734 jj = aij->j; 3735 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3736 olens = dlens + m; 3737 for (i=0; i<m; i++) { 3738 jend = ii[i+1] - ii[i]; 3739 olen = 0; 3740 dlen = 0; 3741 for (j=0; j<jend; j++) { 3742 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3743 else dlen++; 3744 jj++; 3745 } 3746 olens[i] = olen; 3747 dlens[i] = dlen; 3748 } 3749 3750 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3751 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3752 3753 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3754 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3755 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3756 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3757 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3758 ierr = PetscFree(dlens);CHKERRQ(ierr); 3759 3760 } else { /* call == MAT_REUSE_MATRIX */ 3761 M = *newmat; 3762 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3763 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3764 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3765 /* 3766 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3767 rather than the slower MatSetValues(). 3768 */ 3769 M->was_assembled = PETSC_TRUE; 3770 M->assembled = PETSC_FALSE; 3771 } 3772 3773 /* (5) Set values of Msub to *newmat */ 3774 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3775 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3776 3777 jj = aij->j; 3778 aa = aij->a; 3779 for (i=0; i<m; i++) { 3780 row = rstart + i; 3781 nz = ii[i+1] - ii[i]; 3782 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3783 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3784 jj += nz; aa += nz; 3785 } 3786 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3787 3788 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3789 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3790 3791 ierr = PetscFree(colsub);CHKERRQ(ierr); 3792 3793 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3794 if (call == MAT_INITIAL_MATRIX) { 3795 *newmat = M; 3796 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3797 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3798 3799 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3800 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3801 3802 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3803 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3804 3805 if (iscol_local) { 3806 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3807 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3808 } 3809 } 3810 PetscFunctionReturn(0); 3811 } 3812 3813 /* 3814 Not great since it makes two copies of the submatrix, first an SeqAIJ 3815 in local and then by concatenating the local matrices the end result. 3816 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3817 3818 Note: This requires a sequential iscol with all indices. 3819 */ 3820 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3821 { 3822 PetscErrorCode ierr; 3823 PetscMPIInt rank,size; 3824 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3825 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3826 Mat M,Mreuse; 3827 MatScalar *aa,*vwork; 3828 MPI_Comm comm; 3829 Mat_SeqAIJ *aij; 3830 PetscBool colflag,allcolumns=PETSC_FALSE; 3831 3832 PetscFunctionBegin; 3833 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3834 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3835 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3836 3837 /* Check for special case: each processor gets entire matrix columns */ 3838 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3839 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3840 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3841 3842 if (call == MAT_REUSE_MATRIX) { 3843 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3844 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3845 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3846 } else { 3847 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3848 } 3849 3850 /* 3851 m - number of local rows 3852 n - number of columns (same on all processors) 3853 rstart - first row in new global matrix generated 3854 */ 3855 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3856 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3857 if (call == MAT_INITIAL_MATRIX) { 3858 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3859 ii = aij->i; 3860 jj = aij->j; 3861 3862 /* 3863 Determine the number of non-zeros in the diagonal and off-diagonal 3864 portions of the matrix in order to do correct preallocation 3865 */ 3866 3867 /* first get start and end of "diagonal" columns */ 3868 if (csize == PETSC_DECIDE) { 3869 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3870 if (mglobal == n) { /* square matrix */ 3871 nlocal = m; 3872 } else { 3873 nlocal = n/size + ((n % size) > rank); 3874 } 3875 } else { 3876 nlocal = csize; 3877 } 3878 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3879 rstart = rend - nlocal; 3880 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3881 3882 /* next, compute all the lengths */ 3883 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3884 olens = dlens + m; 3885 for (i=0; i<m; i++) { 3886 jend = ii[i+1] - ii[i]; 3887 olen = 0; 3888 dlen = 0; 3889 for (j=0; j<jend; j++) { 3890 if (*jj < rstart || *jj >= rend) olen++; 3891 else dlen++; 3892 jj++; 3893 } 3894 olens[i] = olen; 3895 dlens[i] = dlen; 3896 } 3897 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3898 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3899 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3900 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3901 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3902 ierr = PetscFree(dlens);CHKERRQ(ierr); 3903 } else { 3904 PetscInt ml,nl; 3905 3906 M = *newmat; 3907 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3908 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3909 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3910 /* 3911 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3912 rather than the slower MatSetValues(). 3913 */ 3914 M->was_assembled = PETSC_TRUE; 3915 M->assembled = PETSC_FALSE; 3916 } 3917 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3918 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3919 ii = aij->i; 3920 jj = aij->j; 3921 aa = aij->a; 3922 for (i=0; i<m; i++) { 3923 row = rstart + i; 3924 nz = ii[i+1] - ii[i]; 3925 cwork = jj; jj += nz; 3926 vwork = aa; aa += nz; 3927 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3928 } 3929 3930 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3931 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3932 *newmat = M; 3933 3934 /* save submatrix used in processor for next request */ 3935 if (call == MAT_INITIAL_MATRIX) { 3936 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3937 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3938 } 3939 PetscFunctionReturn(0); 3940 } 3941 3942 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3943 { 3944 PetscInt m,cstart, cend,j,nnz,i,d; 3945 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3946 const PetscInt *JJ; 3947 PetscErrorCode ierr; 3948 PetscBool nooffprocentries; 3949 3950 PetscFunctionBegin; 3951 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3952 3953 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3954 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3955 m = B->rmap->n; 3956 cstart = B->cmap->rstart; 3957 cend = B->cmap->rend; 3958 rstart = B->rmap->rstart; 3959 3960 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3961 3962 #if defined(PETSC_USE_DEBUG) 3963 for (i=0; i<m; i++) { 3964 nnz = Ii[i+1]- Ii[i]; 3965 JJ = J + Ii[i]; 3966 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3967 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3968 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3969 } 3970 #endif 3971 3972 for (i=0; i<m; i++) { 3973 nnz = Ii[i+1]- Ii[i]; 3974 JJ = J + Ii[i]; 3975 nnz_max = PetscMax(nnz_max,nnz); 3976 d = 0; 3977 for (j=0; j<nnz; j++) { 3978 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3979 } 3980 d_nnz[i] = d; 3981 o_nnz[i] = nnz - d; 3982 } 3983 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3984 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3985 3986 for (i=0; i<m; i++) { 3987 ii = i + rstart; 3988 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3989 } 3990 nooffprocentries = B->nooffprocentries; 3991 B->nooffprocentries = PETSC_TRUE; 3992 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3993 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3994 B->nooffprocentries = nooffprocentries; 3995 3996 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3997 PetscFunctionReturn(0); 3998 } 3999 4000 /*@ 4001 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 4002 (the default parallel PETSc format). 4003 4004 Collective 4005 4006 Input Parameters: 4007 + B - the matrix 4008 . i - the indices into j for the start of each local row (starts with zero) 4009 . j - the column indices for each local row (starts with zero) 4010 - v - optional values in the matrix 4011 4012 Level: developer 4013 4014 Notes: 4015 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 4016 thus you CANNOT change the matrix entries by changing the values of v[] after you have 4017 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4018 4019 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4020 4021 The format which is used for the sparse matrix input, is equivalent to a 4022 row-major ordering.. i.e for the following matrix, the input data expected is 4023 as shown 4024 4025 $ 1 0 0 4026 $ 2 0 3 P0 4027 $ ------- 4028 $ 4 5 6 P1 4029 $ 4030 $ Process0 [P0]: rows_owned=[0,1] 4031 $ i = {0,1,3} [size = nrow+1 = 2+1] 4032 $ j = {0,0,2} [size = 3] 4033 $ v = {1,2,3} [size = 3] 4034 $ 4035 $ Process1 [P1]: rows_owned=[2] 4036 $ i = {0,3} [size = nrow+1 = 1+1] 4037 $ j = {0,1,2} [size = 3] 4038 $ v = {4,5,6} [size = 3] 4039 4040 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4041 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4042 @*/ 4043 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4044 { 4045 PetscErrorCode ierr; 4046 4047 PetscFunctionBegin; 4048 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4049 PetscFunctionReturn(0); 4050 } 4051 4052 /*@C 4053 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4054 (the default parallel PETSc format). For good matrix assembly performance 4055 the user should preallocate the matrix storage by setting the parameters 4056 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4057 performance can be increased by more than a factor of 50. 4058 4059 Collective 4060 4061 Input Parameters: 4062 + B - the matrix 4063 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4064 (same value is used for all local rows) 4065 . d_nnz - array containing the number of nonzeros in the various rows of the 4066 DIAGONAL portion of the local submatrix (possibly different for each row) 4067 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4068 The size of this array is equal to the number of local rows, i.e 'm'. 4069 For matrices that will be factored, you must leave room for (and set) 4070 the diagonal entry even if it is zero. 4071 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4072 submatrix (same value is used for all local rows). 4073 - o_nnz - array containing the number of nonzeros in the various rows of the 4074 OFF-DIAGONAL portion of the local submatrix (possibly different for 4075 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4076 structure. The size of this array is equal to the number 4077 of local rows, i.e 'm'. 4078 4079 If the *_nnz parameter is given then the *_nz parameter is ignored 4080 4081 The AIJ format (also called the Yale sparse matrix format or 4082 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4083 storage. The stored row and column indices begin with zero. 4084 See Users-Manual: ch_mat for details. 4085 4086 The parallel matrix is partitioned such that the first m0 rows belong to 4087 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4088 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4089 4090 The DIAGONAL portion of the local submatrix of a processor can be defined 4091 as the submatrix which is obtained by extraction the part corresponding to 4092 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4093 first row that belongs to the processor, r2 is the last row belonging to 4094 the this processor, and c1-c2 is range of indices of the local part of a 4095 vector suitable for applying the matrix to. This is an mxn matrix. In the 4096 common case of a square matrix, the row and column ranges are the same and 4097 the DIAGONAL part is also square. The remaining portion of the local 4098 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4099 4100 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4101 4102 You can call MatGetInfo() to get information on how effective the preallocation was; 4103 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4104 You can also run with the option -info and look for messages with the string 4105 malloc in them to see if additional memory allocation was needed. 4106 4107 Example usage: 4108 4109 Consider the following 8x8 matrix with 34 non-zero values, that is 4110 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4111 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4112 as follows: 4113 4114 .vb 4115 1 2 0 | 0 3 0 | 0 4 4116 Proc0 0 5 6 | 7 0 0 | 8 0 4117 9 0 10 | 11 0 0 | 12 0 4118 ------------------------------------- 4119 13 0 14 | 15 16 17 | 0 0 4120 Proc1 0 18 0 | 19 20 21 | 0 0 4121 0 0 0 | 22 23 0 | 24 0 4122 ------------------------------------- 4123 Proc2 25 26 27 | 0 0 28 | 29 0 4124 30 0 0 | 31 32 33 | 0 34 4125 .ve 4126 4127 This can be represented as a collection of submatrices as: 4128 4129 .vb 4130 A B C 4131 D E F 4132 G H I 4133 .ve 4134 4135 Where the submatrices A,B,C are owned by proc0, D,E,F are 4136 owned by proc1, G,H,I are owned by proc2. 4137 4138 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4139 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4140 The 'M','N' parameters are 8,8, and have the same values on all procs. 4141 4142 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4143 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4144 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4145 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4146 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4147 matrix, ans [DF] as another SeqAIJ matrix. 4148 4149 When d_nz, o_nz parameters are specified, d_nz storage elements are 4150 allocated for every row of the local diagonal submatrix, and o_nz 4151 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4152 One way to choose d_nz and o_nz is to use the max nonzerors per local 4153 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4154 In this case, the values of d_nz,o_nz are: 4155 .vb 4156 proc0 : dnz = 2, o_nz = 2 4157 proc1 : dnz = 3, o_nz = 2 4158 proc2 : dnz = 1, o_nz = 4 4159 .ve 4160 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4161 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4162 for proc3. i.e we are using 12+15+10=37 storage locations to store 4163 34 values. 4164 4165 When d_nnz, o_nnz parameters are specified, the storage is specified 4166 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4167 In the above case the values for d_nnz,o_nnz are: 4168 .vb 4169 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4170 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4171 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4172 .ve 4173 Here the space allocated is sum of all the above values i.e 34, and 4174 hence pre-allocation is perfect. 4175 4176 Level: intermediate 4177 4178 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4179 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4180 @*/ 4181 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4182 { 4183 PetscErrorCode ierr; 4184 4185 PetscFunctionBegin; 4186 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4187 PetscValidType(B,1); 4188 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4189 PetscFunctionReturn(0); 4190 } 4191 4192 /*@ 4193 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4194 CSR format for the local rows. 4195 4196 Collective 4197 4198 Input Parameters: 4199 + comm - MPI communicator 4200 . m - number of local rows (Cannot be PETSC_DECIDE) 4201 . n - This value should be the same as the local size used in creating the 4202 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4203 calculated if N is given) For square matrices n is almost always m. 4204 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4205 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4206 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4207 . j - column indices 4208 - a - matrix values 4209 4210 Output Parameter: 4211 . mat - the matrix 4212 4213 Level: intermediate 4214 4215 Notes: 4216 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4217 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4218 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4219 4220 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4221 4222 The format which is used for the sparse matrix input, is equivalent to a 4223 row-major ordering.. i.e for the following matrix, the input data expected is 4224 as shown 4225 4226 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4227 4228 $ 1 0 0 4229 $ 2 0 3 P0 4230 $ ------- 4231 $ 4 5 6 P1 4232 $ 4233 $ Process0 [P0]: rows_owned=[0,1] 4234 $ i = {0,1,3} [size = nrow+1 = 2+1] 4235 $ j = {0,0,2} [size = 3] 4236 $ v = {1,2,3} [size = 3] 4237 $ 4238 $ Process1 [P1]: rows_owned=[2] 4239 $ i = {0,3} [size = nrow+1 = 1+1] 4240 $ j = {0,1,2} [size = 3] 4241 $ v = {4,5,6} [size = 3] 4242 4243 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4244 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4245 @*/ 4246 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4247 { 4248 PetscErrorCode ierr; 4249 4250 PetscFunctionBegin; 4251 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4252 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4253 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4254 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4255 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4256 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4257 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4258 PetscFunctionReturn(0); 4259 } 4260 4261 /*@ 4262 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4263 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4264 4265 Collective 4266 4267 Input Parameters: 4268 + mat - the matrix 4269 . m - number of local rows (Cannot be PETSC_DECIDE) 4270 . n - This value should be the same as the local size used in creating the 4271 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4272 calculated if N is given) For square matrices n is almost always m. 4273 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4274 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4275 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4276 . J - column indices 4277 - v - matrix values 4278 4279 Level: intermediate 4280 4281 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4282 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4283 @*/ 4284 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4285 { 4286 PetscErrorCode ierr; 4287 PetscInt cstart,nnz,i,j; 4288 PetscInt *ld; 4289 PetscBool nooffprocentries; 4290 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4291 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4292 PetscScalar *ad = Ad->a, *ao = Ao->a; 4293 const PetscInt *Adi = Ad->i; 4294 PetscInt ldi,Iii,md; 4295 4296 PetscFunctionBegin; 4297 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4298 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4299 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4300 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4301 4302 cstart = mat->cmap->rstart; 4303 if (!Aij->ld) { 4304 /* count number of entries below block diagonal */ 4305 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4306 Aij->ld = ld; 4307 for (i=0; i<m; i++) { 4308 nnz = Ii[i+1]- Ii[i]; 4309 j = 0; 4310 while (J[j] < cstart && j < nnz) {j++;} 4311 J += nnz; 4312 ld[i] = j; 4313 } 4314 } else { 4315 ld = Aij->ld; 4316 } 4317 4318 for (i=0; i<m; i++) { 4319 nnz = Ii[i+1]- Ii[i]; 4320 Iii = Ii[i]; 4321 ldi = ld[i]; 4322 md = Adi[i+1]-Adi[i]; 4323 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4324 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4325 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4326 ad += md; 4327 ao += nnz - md; 4328 } 4329 nooffprocentries = mat->nooffprocentries; 4330 mat->nooffprocentries = PETSC_TRUE; 4331 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4332 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4333 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4334 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4335 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4336 mat->nooffprocentries = nooffprocentries; 4337 PetscFunctionReturn(0); 4338 } 4339 4340 /*@C 4341 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4342 (the default parallel PETSc format). For good matrix assembly performance 4343 the user should preallocate the matrix storage by setting the parameters 4344 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4345 performance can be increased by more than a factor of 50. 4346 4347 Collective 4348 4349 Input Parameters: 4350 + comm - MPI communicator 4351 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4352 This value should be the same as the local size used in creating the 4353 y vector for the matrix-vector product y = Ax. 4354 . n - This value should be the same as the local size used in creating the 4355 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4356 calculated if N is given) For square matrices n is almost always m. 4357 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4358 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4359 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4360 (same value is used for all local rows) 4361 . d_nnz - array containing the number of nonzeros in the various rows of the 4362 DIAGONAL portion of the local submatrix (possibly different for each row) 4363 or NULL, if d_nz is used to specify the nonzero structure. 4364 The size of this array is equal to the number of local rows, i.e 'm'. 4365 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4366 submatrix (same value is used for all local rows). 4367 - o_nnz - array containing the number of nonzeros in the various rows of the 4368 OFF-DIAGONAL portion of the local submatrix (possibly different for 4369 each row) or NULL, if o_nz is used to specify the nonzero 4370 structure. The size of this array is equal to the number 4371 of local rows, i.e 'm'. 4372 4373 Output Parameter: 4374 . A - the matrix 4375 4376 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4377 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4378 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4379 4380 Notes: 4381 If the *_nnz parameter is given then the *_nz parameter is ignored 4382 4383 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4384 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4385 storage requirements for this matrix. 4386 4387 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4388 processor than it must be used on all processors that share the object for 4389 that argument. 4390 4391 The user MUST specify either the local or global matrix dimensions 4392 (possibly both). 4393 4394 The parallel matrix is partitioned across processors such that the 4395 first m0 rows belong to process 0, the next m1 rows belong to 4396 process 1, the next m2 rows belong to process 2 etc.. where 4397 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4398 values corresponding to [m x N] submatrix. 4399 4400 The columns are logically partitioned with the n0 columns belonging 4401 to 0th partition, the next n1 columns belonging to the next 4402 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4403 4404 The DIAGONAL portion of the local submatrix on any given processor 4405 is the submatrix corresponding to the rows and columns m,n 4406 corresponding to the given processor. i.e diagonal matrix on 4407 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4408 etc. The remaining portion of the local submatrix [m x (N-n)] 4409 constitute the OFF-DIAGONAL portion. The example below better 4410 illustrates this concept. 4411 4412 For a square global matrix we define each processor's diagonal portion 4413 to be its local rows and the corresponding columns (a square submatrix); 4414 each processor's off-diagonal portion encompasses the remainder of the 4415 local matrix (a rectangular submatrix). 4416 4417 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4418 4419 When calling this routine with a single process communicator, a matrix of 4420 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4421 type of communicator, use the construction mechanism 4422 .vb 4423 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4424 .ve 4425 4426 $ MatCreate(...,&A); 4427 $ MatSetType(A,MATMPIAIJ); 4428 $ MatSetSizes(A, m,n,M,N); 4429 $ MatMPIAIJSetPreallocation(A,...); 4430 4431 By default, this format uses inodes (identical nodes) when possible. 4432 We search for consecutive rows with the same nonzero structure, thereby 4433 reusing matrix information to achieve increased efficiency. 4434 4435 Options Database Keys: 4436 + -mat_no_inode - Do not use inodes 4437 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4438 4439 4440 4441 Example usage: 4442 4443 Consider the following 8x8 matrix with 34 non-zero values, that is 4444 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4445 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4446 as follows 4447 4448 .vb 4449 1 2 0 | 0 3 0 | 0 4 4450 Proc0 0 5 6 | 7 0 0 | 8 0 4451 9 0 10 | 11 0 0 | 12 0 4452 ------------------------------------- 4453 13 0 14 | 15 16 17 | 0 0 4454 Proc1 0 18 0 | 19 20 21 | 0 0 4455 0 0 0 | 22 23 0 | 24 0 4456 ------------------------------------- 4457 Proc2 25 26 27 | 0 0 28 | 29 0 4458 30 0 0 | 31 32 33 | 0 34 4459 .ve 4460 4461 This can be represented as a collection of submatrices as 4462 4463 .vb 4464 A B C 4465 D E F 4466 G H I 4467 .ve 4468 4469 Where the submatrices A,B,C are owned by proc0, D,E,F are 4470 owned by proc1, G,H,I are owned by proc2. 4471 4472 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4473 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4474 The 'M','N' parameters are 8,8, and have the same values on all procs. 4475 4476 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4477 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4478 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4479 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4480 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4481 matrix, ans [DF] as another SeqAIJ matrix. 4482 4483 When d_nz, o_nz parameters are specified, d_nz storage elements are 4484 allocated for every row of the local diagonal submatrix, and o_nz 4485 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4486 One way to choose d_nz and o_nz is to use the max nonzerors per local 4487 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4488 In this case, the values of d_nz,o_nz are 4489 .vb 4490 proc0 : dnz = 2, o_nz = 2 4491 proc1 : dnz = 3, o_nz = 2 4492 proc2 : dnz = 1, o_nz = 4 4493 .ve 4494 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4495 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4496 for proc3. i.e we are using 12+15+10=37 storage locations to store 4497 34 values. 4498 4499 When d_nnz, o_nnz parameters are specified, the storage is specified 4500 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4501 In the above case the values for d_nnz,o_nnz are 4502 .vb 4503 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4504 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4505 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4506 .ve 4507 Here the space allocated is sum of all the above values i.e 34, and 4508 hence pre-allocation is perfect. 4509 4510 Level: intermediate 4511 4512 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4513 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4514 @*/ 4515 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4516 { 4517 PetscErrorCode ierr; 4518 PetscMPIInt size; 4519 4520 PetscFunctionBegin; 4521 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4522 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4523 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4524 if (size > 1) { 4525 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4526 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4527 } else { 4528 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4529 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4530 } 4531 PetscFunctionReturn(0); 4532 } 4533 4534 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4535 { 4536 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4537 PetscBool flg; 4538 PetscErrorCode ierr; 4539 4540 PetscFunctionBegin; 4541 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4542 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4543 if (Ad) *Ad = a->A; 4544 if (Ao) *Ao = a->B; 4545 if (colmap) *colmap = a->garray; 4546 PetscFunctionReturn(0); 4547 } 4548 4549 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4550 { 4551 PetscErrorCode ierr; 4552 PetscInt m,N,i,rstart,nnz,Ii; 4553 PetscInt *indx; 4554 PetscScalar *values; 4555 4556 PetscFunctionBegin; 4557 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4558 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4559 PetscInt *dnz,*onz,sum,bs,cbs; 4560 4561 if (n == PETSC_DECIDE) { 4562 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4563 } 4564 /* Check sum(n) = N */ 4565 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4566 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4567 4568 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4569 rstart -= m; 4570 4571 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4572 for (i=0; i<m; i++) { 4573 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4574 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4575 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4576 } 4577 4578 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4579 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4580 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4581 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4582 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4583 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4584 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4585 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4586 } 4587 4588 /* numeric phase */ 4589 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4590 for (i=0; i<m; i++) { 4591 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4592 Ii = i + rstart; 4593 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4594 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4595 } 4596 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4597 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4598 PetscFunctionReturn(0); 4599 } 4600 4601 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4602 { 4603 PetscErrorCode ierr; 4604 PetscMPIInt rank; 4605 PetscInt m,N,i,rstart,nnz; 4606 size_t len; 4607 const PetscInt *indx; 4608 PetscViewer out; 4609 char *name; 4610 Mat B; 4611 const PetscScalar *values; 4612 4613 PetscFunctionBegin; 4614 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4615 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4616 /* Should this be the type of the diagonal block of A? */ 4617 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4618 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4619 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4620 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4621 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4622 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4623 for (i=0; i<m; i++) { 4624 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4625 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4626 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4627 } 4628 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4629 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4630 4631 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4632 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4633 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4634 sprintf(name,"%s.%d",outfile,rank); 4635 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4636 ierr = PetscFree(name);CHKERRQ(ierr); 4637 ierr = MatView(B,out);CHKERRQ(ierr); 4638 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4639 ierr = MatDestroy(&B);CHKERRQ(ierr); 4640 PetscFunctionReturn(0); 4641 } 4642 4643 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4644 { 4645 PetscErrorCode ierr; 4646 Mat_Merge_SeqsToMPI *merge; 4647 PetscContainer container; 4648 4649 PetscFunctionBegin; 4650 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4651 if (container) { 4652 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4653 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4654 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4655 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4656 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4657 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4658 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4659 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4660 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4661 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4662 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4663 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4664 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4665 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4666 ierr = PetscFree(merge);CHKERRQ(ierr); 4667 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4668 } 4669 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4670 PetscFunctionReturn(0); 4671 } 4672 4673 #include <../src/mat/utils/freespace.h> 4674 #include <petscbt.h> 4675 4676 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4677 { 4678 PetscErrorCode ierr; 4679 MPI_Comm comm; 4680 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4681 PetscMPIInt size,rank,taga,*len_s; 4682 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4683 PetscInt proc,m; 4684 PetscInt **buf_ri,**buf_rj; 4685 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4686 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4687 MPI_Request *s_waits,*r_waits; 4688 MPI_Status *status; 4689 MatScalar *aa=a->a; 4690 MatScalar **abuf_r,*ba_i; 4691 Mat_Merge_SeqsToMPI *merge; 4692 PetscContainer container; 4693 4694 PetscFunctionBegin; 4695 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4696 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4697 4698 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4699 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4700 4701 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4702 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4703 4704 bi = merge->bi; 4705 bj = merge->bj; 4706 buf_ri = merge->buf_ri; 4707 buf_rj = merge->buf_rj; 4708 4709 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4710 owners = merge->rowmap->range; 4711 len_s = merge->len_s; 4712 4713 /* send and recv matrix values */ 4714 /*-----------------------------*/ 4715 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4716 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4717 4718 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4719 for (proc=0,k=0; proc<size; proc++) { 4720 if (!len_s[proc]) continue; 4721 i = owners[proc]; 4722 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4723 k++; 4724 } 4725 4726 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4727 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4728 ierr = PetscFree(status);CHKERRQ(ierr); 4729 4730 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4731 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4732 4733 /* insert mat values of mpimat */ 4734 /*----------------------------*/ 4735 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4736 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4737 4738 for (k=0; k<merge->nrecv; k++) { 4739 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4740 nrows = *(buf_ri_k[k]); 4741 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4742 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4743 } 4744 4745 /* set values of ba */ 4746 m = merge->rowmap->n; 4747 for (i=0; i<m; i++) { 4748 arow = owners[rank] + i; 4749 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4750 bnzi = bi[i+1] - bi[i]; 4751 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4752 4753 /* add local non-zero vals of this proc's seqmat into ba */ 4754 anzi = ai[arow+1] - ai[arow]; 4755 aj = a->j + ai[arow]; 4756 aa = a->a + ai[arow]; 4757 nextaj = 0; 4758 for (j=0; nextaj<anzi; j++) { 4759 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4760 ba_i[j] += aa[nextaj++]; 4761 } 4762 } 4763 4764 /* add received vals into ba */ 4765 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4766 /* i-th row */ 4767 if (i == *nextrow[k]) { 4768 anzi = *(nextai[k]+1) - *nextai[k]; 4769 aj = buf_rj[k] + *(nextai[k]); 4770 aa = abuf_r[k] + *(nextai[k]); 4771 nextaj = 0; 4772 for (j=0; nextaj<anzi; j++) { 4773 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4774 ba_i[j] += aa[nextaj++]; 4775 } 4776 } 4777 nextrow[k]++; nextai[k]++; 4778 } 4779 } 4780 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4781 } 4782 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4783 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4784 4785 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4786 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4787 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4788 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4789 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4790 PetscFunctionReturn(0); 4791 } 4792 4793 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4794 { 4795 PetscErrorCode ierr; 4796 Mat B_mpi; 4797 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4798 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4799 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4800 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4801 PetscInt len,proc,*dnz,*onz,bs,cbs; 4802 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4803 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4804 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4805 MPI_Status *status; 4806 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4807 PetscBT lnkbt; 4808 Mat_Merge_SeqsToMPI *merge; 4809 PetscContainer container; 4810 4811 PetscFunctionBegin; 4812 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4813 4814 /* make sure it is a PETSc comm */ 4815 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4816 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4817 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4818 4819 ierr = PetscNew(&merge);CHKERRQ(ierr); 4820 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4821 4822 /* determine row ownership */ 4823 /*---------------------------------------------------------*/ 4824 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4825 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4826 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4827 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4828 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4829 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4830 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4831 4832 m = merge->rowmap->n; 4833 owners = merge->rowmap->range; 4834 4835 /* determine the number of messages to send, their lengths */ 4836 /*---------------------------------------------------------*/ 4837 len_s = merge->len_s; 4838 4839 len = 0; /* length of buf_si[] */ 4840 merge->nsend = 0; 4841 for (proc=0; proc<size; proc++) { 4842 len_si[proc] = 0; 4843 if (proc == rank) { 4844 len_s[proc] = 0; 4845 } else { 4846 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4847 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4848 } 4849 if (len_s[proc]) { 4850 merge->nsend++; 4851 nrows = 0; 4852 for (i=owners[proc]; i<owners[proc+1]; i++) { 4853 if (ai[i+1] > ai[i]) nrows++; 4854 } 4855 len_si[proc] = 2*(nrows+1); 4856 len += len_si[proc]; 4857 } 4858 } 4859 4860 /* determine the number and length of messages to receive for ij-structure */ 4861 /*-------------------------------------------------------------------------*/ 4862 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4863 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4864 4865 /* post the Irecv of j-structure */ 4866 /*-------------------------------*/ 4867 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4868 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4869 4870 /* post the Isend of j-structure */ 4871 /*--------------------------------*/ 4872 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4873 4874 for (proc=0, k=0; proc<size; proc++) { 4875 if (!len_s[proc]) continue; 4876 i = owners[proc]; 4877 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4878 k++; 4879 } 4880 4881 /* receives and sends of j-structure are complete */ 4882 /*------------------------------------------------*/ 4883 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4884 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4885 4886 /* send and recv i-structure */ 4887 /*---------------------------*/ 4888 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4889 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4890 4891 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4892 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4893 for (proc=0,k=0; proc<size; proc++) { 4894 if (!len_s[proc]) continue; 4895 /* form outgoing message for i-structure: 4896 buf_si[0]: nrows to be sent 4897 [1:nrows]: row index (global) 4898 [nrows+1:2*nrows+1]: i-structure index 4899 */ 4900 /*-------------------------------------------*/ 4901 nrows = len_si[proc]/2 - 1; 4902 buf_si_i = buf_si + nrows+1; 4903 buf_si[0] = nrows; 4904 buf_si_i[0] = 0; 4905 nrows = 0; 4906 for (i=owners[proc]; i<owners[proc+1]; i++) { 4907 anzi = ai[i+1] - ai[i]; 4908 if (anzi) { 4909 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4910 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4911 nrows++; 4912 } 4913 } 4914 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4915 k++; 4916 buf_si += len_si[proc]; 4917 } 4918 4919 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4920 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4921 4922 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4923 for (i=0; i<merge->nrecv; i++) { 4924 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4925 } 4926 4927 ierr = PetscFree(len_si);CHKERRQ(ierr); 4928 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4929 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4930 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4931 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4932 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4933 ierr = PetscFree(status);CHKERRQ(ierr); 4934 4935 /* compute a local seq matrix in each processor */ 4936 /*----------------------------------------------*/ 4937 /* allocate bi array and free space for accumulating nonzero column info */ 4938 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4939 bi[0] = 0; 4940 4941 /* create and initialize a linked list */ 4942 nlnk = N+1; 4943 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4944 4945 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4946 len = ai[owners[rank+1]] - ai[owners[rank]]; 4947 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4948 4949 current_space = free_space; 4950 4951 /* determine symbolic info for each local row */ 4952 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4953 4954 for (k=0; k<merge->nrecv; k++) { 4955 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4956 nrows = *buf_ri_k[k]; 4957 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4958 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4959 } 4960 4961 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4962 len = 0; 4963 for (i=0; i<m; i++) { 4964 bnzi = 0; 4965 /* add local non-zero cols of this proc's seqmat into lnk */ 4966 arow = owners[rank] + i; 4967 anzi = ai[arow+1] - ai[arow]; 4968 aj = a->j + ai[arow]; 4969 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4970 bnzi += nlnk; 4971 /* add received col data into lnk */ 4972 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4973 if (i == *nextrow[k]) { /* i-th row */ 4974 anzi = *(nextai[k]+1) - *nextai[k]; 4975 aj = buf_rj[k] + *nextai[k]; 4976 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4977 bnzi += nlnk; 4978 nextrow[k]++; nextai[k]++; 4979 } 4980 } 4981 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4982 4983 /* if free space is not available, make more free space */ 4984 if (current_space->local_remaining<bnzi) { 4985 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4986 nspacedouble++; 4987 } 4988 /* copy data into free space, then initialize lnk */ 4989 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4990 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4991 4992 current_space->array += bnzi; 4993 current_space->local_used += bnzi; 4994 current_space->local_remaining -= bnzi; 4995 4996 bi[i+1] = bi[i] + bnzi; 4997 } 4998 4999 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 5000 5001 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 5002 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 5003 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 5004 5005 /* create symbolic parallel matrix B_mpi */ 5006 /*---------------------------------------*/ 5007 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 5008 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 5009 if (n==PETSC_DECIDE) { 5010 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 5011 } else { 5012 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5013 } 5014 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 5015 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 5016 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 5017 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5018 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5019 5020 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5021 B_mpi->assembled = PETSC_FALSE; 5022 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5023 merge->bi = bi; 5024 merge->bj = bj; 5025 merge->buf_ri = buf_ri; 5026 merge->buf_rj = buf_rj; 5027 merge->coi = NULL; 5028 merge->coj = NULL; 5029 merge->owners_co = NULL; 5030 5031 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5032 5033 /* attach the supporting struct to B_mpi for reuse */ 5034 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5035 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5036 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5037 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5038 *mpimat = B_mpi; 5039 5040 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5041 PetscFunctionReturn(0); 5042 } 5043 5044 /*@C 5045 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5046 matrices from each processor 5047 5048 Collective 5049 5050 Input Parameters: 5051 + comm - the communicators the parallel matrix will live on 5052 . seqmat - the input sequential matrices 5053 . m - number of local rows (or PETSC_DECIDE) 5054 . n - number of local columns (or PETSC_DECIDE) 5055 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5056 5057 Output Parameter: 5058 . mpimat - the parallel matrix generated 5059 5060 Level: advanced 5061 5062 Notes: 5063 The dimensions of the sequential matrix in each processor MUST be the same. 5064 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5065 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5066 @*/ 5067 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5068 { 5069 PetscErrorCode ierr; 5070 PetscMPIInt size; 5071 5072 PetscFunctionBegin; 5073 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5074 if (size == 1) { 5075 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5076 if (scall == MAT_INITIAL_MATRIX) { 5077 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5078 } else { 5079 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5080 } 5081 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5082 PetscFunctionReturn(0); 5083 } 5084 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5085 if (scall == MAT_INITIAL_MATRIX) { 5086 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5087 } 5088 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5089 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5090 PetscFunctionReturn(0); 5091 } 5092 5093 /*@ 5094 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5095 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5096 with MatGetSize() 5097 5098 Not Collective 5099 5100 Input Parameters: 5101 + A - the matrix 5102 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5103 5104 Output Parameter: 5105 . A_loc - the local sequential matrix generated 5106 5107 Level: developer 5108 5109 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5110 5111 @*/ 5112 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5113 { 5114 PetscErrorCode ierr; 5115 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5116 Mat_SeqAIJ *mat,*a,*b; 5117 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5118 MatScalar *aa,*ba,*cam; 5119 PetscScalar *ca; 5120 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5121 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5122 PetscBool match; 5123 MPI_Comm comm; 5124 PetscMPIInt size; 5125 5126 PetscFunctionBegin; 5127 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5128 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5129 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5130 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5131 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5132 5133 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5134 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5135 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5136 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5137 aa = a->a; ba = b->a; 5138 if (scall == MAT_INITIAL_MATRIX) { 5139 if (size == 1) { 5140 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5141 PetscFunctionReturn(0); 5142 } 5143 5144 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5145 ci[0] = 0; 5146 for (i=0; i<am; i++) { 5147 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5148 } 5149 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5150 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5151 k = 0; 5152 for (i=0; i<am; i++) { 5153 ncols_o = bi[i+1] - bi[i]; 5154 ncols_d = ai[i+1] - ai[i]; 5155 /* off-diagonal portion of A */ 5156 for (jo=0; jo<ncols_o; jo++) { 5157 col = cmap[*bj]; 5158 if (col >= cstart) break; 5159 cj[k] = col; bj++; 5160 ca[k++] = *ba++; 5161 } 5162 /* diagonal portion of A */ 5163 for (j=0; j<ncols_d; j++) { 5164 cj[k] = cstart + *aj++; 5165 ca[k++] = *aa++; 5166 } 5167 /* off-diagonal portion of A */ 5168 for (j=jo; j<ncols_o; j++) { 5169 cj[k] = cmap[*bj++]; 5170 ca[k++] = *ba++; 5171 } 5172 } 5173 /* put together the new matrix */ 5174 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5175 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5176 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5177 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5178 mat->free_a = PETSC_TRUE; 5179 mat->free_ij = PETSC_TRUE; 5180 mat->nonew = 0; 5181 } else if (scall == MAT_REUSE_MATRIX) { 5182 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5183 ci = mat->i; cj = mat->j; cam = mat->a; 5184 for (i=0; i<am; i++) { 5185 /* off-diagonal portion of A */ 5186 ncols_o = bi[i+1] - bi[i]; 5187 for (jo=0; jo<ncols_o; jo++) { 5188 col = cmap[*bj]; 5189 if (col >= cstart) break; 5190 *cam++ = *ba++; bj++; 5191 } 5192 /* diagonal portion of A */ 5193 ncols_d = ai[i+1] - ai[i]; 5194 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5195 /* off-diagonal portion of A */ 5196 for (j=jo; j<ncols_o; j++) { 5197 *cam++ = *ba++; bj++; 5198 } 5199 } 5200 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5201 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5202 PetscFunctionReturn(0); 5203 } 5204 5205 /*@C 5206 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5207 5208 Not Collective 5209 5210 Input Parameters: 5211 + A - the matrix 5212 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5213 - row, col - index sets of rows and columns to extract (or NULL) 5214 5215 Output Parameter: 5216 . A_loc - the local sequential matrix generated 5217 5218 Level: developer 5219 5220 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5221 5222 @*/ 5223 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5224 { 5225 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5226 PetscErrorCode ierr; 5227 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5228 IS isrowa,iscola; 5229 Mat *aloc; 5230 PetscBool match; 5231 5232 PetscFunctionBegin; 5233 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5234 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5235 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5236 if (!row) { 5237 start = A->rmap->rstart; end = A->rmap->rend; 5238 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5239 } else { 5240 isrowa = *row; 5241 } 5242 if (!col) { 5243 start = A->cmap->rstart; 5244 cmap = a->garray; 5245 nzA = a->A->cmap->n; 5246 nzB = a->B->cmap->n; 5247 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5248 ncols = 0; 5249 for (i=0; i<nzB; i++) { 5250 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5251 else break; 5252 } 5253 imark = i; 5254 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5255 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5256 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5257 } else { 5258 iscola = *col; 5259 } 5260 if (scall != MAT_INITIAL_MATRIX) { 5261 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5262 aloc[0] = *A_loc; 5263 } 5264 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5265 if (!col) { /* attach global id of condensed columns */ 5266 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5267 } 5268 *A_loc = aloc[0]; 5269 ierr = PetscFree(aloc);CHKERRQ(ierr); 5270 if (!row) { 5271 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5272 } 5273 if (!col) { 5274 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5275 } 5276 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5277 PetscFunctionReturn(0); 5278 } 5279 5280 /* 5281 * Destroy a mat that may be composed with PetscSF communication objects. 5282 * The SF objects were created in MatCreateSeqSubMatrixWithRows_Private. 5283 * */ 5284 PetscErrorCode MatDestroy_SeqAIJ_PetscSF(Mat mat) 5285 { 5286 PetscSF sf,osf; 5287 IS map; 5288 PetscErrorCode ierr; 5289 5290 PetscFunctionBegin; 5291 ierr = PetscObjectQuery((PetscObject)mat,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5292 ierr = PetscObjectQuery((PetscObject)mat,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5293 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5294 ierr = PetscSFDestroy(&osf);CHKERRQ(ierr); 5295 ierr = PetscObjectQuery((PetscObject)mat,"aoffdiagtopothmapping",(PetscObject*)&map);CHKERRQ(ierr); 5296 ierr = ISDestroy(&map);CHKERRQ(ierr); 5297 ierr = MatDestroy_SeqAIJ(mat);CHKERRQ(ierr); 5298 PetscFunctionReturn(0); 5299 } 5300 5301 /* 5302 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5303 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5304 * on a global size. 5305 * */ 5306 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth) 5307 { 5308 Mat_MPIAIJ *p=(Mat_MPIAIJ*)P->data; 5309 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth; 5310 PetscInt plocalsize,nrows,*ilocal,*oilocal,i,owner,lidx,*nrcols,*nlcols,ncol; 5311 PetscSFNode *iremote,*oiremote; 5312 const PetscInt *lrowindices; 5313 PetscErrorCode ierr; 5314 PetscSF sf,osf; 5315 PetscInt pcstart,*roffsets,*loffsets,*pnnz,j; 5316 PetscInt ontotalcols,dntotalcols,ntotalcols,nout; 5317 MPI_Comm comm; 5318 ISLocalToGlobalMapping mapping; 5319 5320 PetscFunctionBegin; 5321 ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr); 5322 /* plocalsize is the number of roots 5323 * nrows is the number of leaves 5324 * */ 5325 ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr); 5326 ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr); 5327 ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr); 5328 ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr); 5329 for (i=0;i<nrows;i++) { 5330 /* Find a remote index and an owner for a row 5331 * The row could be local or remote 5332 * */ 5333 owner = 0; 5334 lidx = 0; 5335 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr); 5336 iremote[i].index = lidx; 5337 iremote[i].rank = owner; 5338 } 5339 /* Create SF to communicate how many nonzero columns for each row */ 5340 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5341 /* SF will figure out the number of nonzero colunms for each row, and their 5342 * offsets 5343 * */ 5344 ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5345 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5346 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5347 5348 ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr); 5349 ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr); 5350 ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr); 5351 roffsets[0] = 0; 5352 roffsets[1] = 0; 5353 for (i=0;i<plocalsize;i++) { 5354 /* diag */ 5355 nrcols[i*2+0] = pd->i[i+1] - pd->i[i]; 5356 /* off diag */ 5357 nrcols[i*2+1] = po->i[i+1] - po->i[i]; 5358 /* compute offsets so that we relative location for each row */ 5359 roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0]; 5360 roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1]; 5361 } 5362 ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr); 5363 ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr); 5364 /* 'r' means root, and 'l' means leaf */ 5365 ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5366 ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5367 ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr); 5368 ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr); 5369 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 5370 ierr = PetscFree(roffsets);CHKERRQ(ierr); 5371 ierr = PetscFree(nrcols);CHKERRQ(ierr); 5372 dntotalcols = 0; 5373 ontotalcols = 0; 5374 ncol = 0; 5375 for (i=0;i<nrows;i++) { 5376 pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1]; 5377 ncol = PetscMax(pnnz[i],ncol); 5378 /* diag */ 5379 dntotalcols += nlcols[i*2+0]; 5380 /* off diag */ 5381 ontotalcols += nlcols[i*2+1]; 5382 } 5383 /* We do not need to figure the right number of columns 5384 * since all the calculations will be done by going through the raw data 5385 * */ 5386 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr); 5387 ierr = MatSetUp(*P_oth);CHKERRQ(ierr); 5388 ierr = PetscFree(pnnz);CHKERRQ(ierr); 5389 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5390 /* diag */ 5391 ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr); 5392 /* off diag */ 5393 ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr); 5394 /* diag */ 5395 ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr); 5396 /* off diag */ 5397 ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr); 5398 dntotalcols = 0; 5399 ontotalcols = 0; 5400 ntotalcols = 0; 5401 for (i=0;i<nrows;i++) { 5402 owner = 0; 5403 ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr); 5404 /* Set iremote for diag matrix */ 5405 for (j=0;j<nlcols[i*2+0];j++) { 5406 iremote[dntotalcols].index = loffsets[i*2+0] + j; 5407 iremote[dntotalcols].rank = owner; 5408 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5409 ilocal[dntotalcols++] = ntotalcols++; 5410 } 5411 /* off diag */ 5412 for (j=0;j<nlcols[i*2+1];j++) { 5413 oiremote[ontotalcols].index = loffsets[i*2+1] + j; 5414 oiremote[ontotalcols].rank = owner; 5415 oilocal[ontotalcols++] = ntotalcols++; 5416 } 5417 } 5418 ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr); 5419 ierr = PetscFree(loffsets);CHKERRQ(ierr); 5420 ierr = PetscFree(nlcols);CHKERRQ(ierr); 5421 ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr); 5422 /* P serves as roots and P_oth is leaves 5423 * Diag matrix 5424 * */ 5425 ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5426 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 5427 ierr = PetscSFSetUp(sf);CHKERRQ(ierr); 5428 5429 ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr); 5430 /* Off diag */ 5431 ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr); 5432 ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr); 5433 ierr = PetscSFSetUp(osf);CHKERRQ(ierr); 5434 /* We operate on the matrix internal data for saving memory */ 5435 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5436 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5437 ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr); 5438 /* Convert to global indices for diag matrix */ 5439 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart; 5440 ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5441 /* We want P_oth store global indices */ 5442 ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr); 5443 /* Use memory scalable approach */ 5444 ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr); 5445 ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr); 5446 ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5447 ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr); 5448 /* Convert back to local indices */ 5449 for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart; 5450 ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr); 5451 nout = 0; 5452 ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr); 5453 if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout); 5454 ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr); 5455 /* Exchange values */ 5456 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5457 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5458 /* Stop PETSc from shrinking memory */ 5459 for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i]; 5460 ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5461 ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5462 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5463 ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr); 5464 ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr); 5465 /* ``New MatDestroy" takes care of PetscSF objects as well */ 5466 (*P_oth)->ops->destroy = MatDestroy_SeqAIJ_PetscSF; 5467 PetscFunctionReturn(0); 5468 } 5469 5470 /* 5471 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5472 * This supports MPIAIJ and MAIJ 5473 * */ 5474 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth) 5475 { 5476 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data; 5477 Mat_SeqAIJ *p_oth; 5478 Mat_SeqAIJ *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data; 5479 IS rows,map; 5480 PetscHMapI hamp; 5481 PetscInt i,htsize,*rowindices,off,*mapping,key,count; 5482 MPI_Comm comm; 5483 PetscSF sf,osf; 5484 PetscBool has; 5485 PetscErrorCode ierr; 5486 5487 PetscFunctionBegin; 5488 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5489 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5490 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5491 * and then create a submatrix (that often is an overlapping matrix) 5492 * */ 5493 if (reuse==MAT_INITIAL_MATRIX) { 5494 /* Use a hash table to figure out unique keys */ 5495 ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr); 5496 ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr); 5497 ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr); 5498 count = 0; 5499 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5500 for (i=0;i<a->B->cmap->n;i++) { 5501 key = a->garray[i]/dof; 5502 ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr); 5503 if (!has) { 5504 mapping[i] = count; 5505 ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr); 5506 } else { 5507 /* Current 'i' has the same value the previous step */ 5508 mapping[i] = count-1; 5509 } 5510 } 5511 ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr); 5512 ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr); 5513 if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr); 5514 ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr); 5515 off = 0; 5516 ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr); 5517 ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr); 5518 ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr); 5519 ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr); 5520 /* In case, the matrix was already created but users want to recreate the matrix */ 5521 ierr = MatDestroy(P_oth);CHKERRQ(ierr); 5522 ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr); 5523 ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr); 5524 ierr = ISDestroy(&rows);CHKERRQ(ierr); 5525 } else if (reuse==MAT_REUSE_MATRIX) { 5526 /* If matrix was already created, we simply update values using SF objects 5527 * that as attached to the matrix ealier. 5528 * */ 5529 ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr); 5530 ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr); 5531 if (!sf || !osf) { 5532 SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet \n"); 5533 } 5534 p_oth = (Mat_SeqAIJ*) (*P_oth)->data; 5535 /* Update values in place */ 5536 ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5537 ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5538 ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr); 5539 ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr); 5540 } else { 5541 SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type \n"); 5542 } 5543 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr); 5544 PetscFunctionReturn(0); 5545 } 5546 5547 /*@C 5548 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5549 5550 Collective on Mat 5551 5552 Input Parameters: 5553 + A,B - the matrices in mpiaij format 5554 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5555 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5556 5557 Output Parameter: 5558 + rowb, colb - index sets of rows and columns of B to extract 5559 - B_seq - the sequential matrix generated 5560 5561 Level: developer 5562 5563 @*/ 5564 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5565 { 5566 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5567 PetscErrorCode ierr; 5568 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5569 IS isrowb,iscolb; 5570 Mat *bseq=NULL; 5571 5572 PetscFunctionBegin; 5573 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5574 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5575 } 5576 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5577 5578 if (scall == MAT_INITIAL_MATRIX) { 5579 start = A->cmap->rstart; 5580 cmap = a->garray; 5581 nzA = a->A->cmap->n; 5582 nzB = a->B->cmap->n; 5583 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5584 ncols = 0; 5585 for (i=0; i<nzB; i++) { /* row < local row index */ 5586 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5587 else break; 5588 } 5589 imark = i; 5590 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5591 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5592 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5593 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5594 } else { 5595 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5596 isrowb = *rowb; iscolb = *colb; 5597 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5598 bseq[0] = *B_seq; 5599 } 5600 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5601 *B_seq = bseq[0]; 5602 ierr = PetscFree(bseq);CHKERRQ(ierr); 5603 if (!rowb) { 5604 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5605 } else { 5606 *rowb = isrowb; 5607 } 5608 if (!colb) { 5609 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5610 } else { 5611 *colb = iscolb; 5612 } 5613 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5614 PetscFunctionReturn(0); 5615 } 5616 5617 /* 5618 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5619 of the OFF-DIAGONAL portion of local A 5620 5621 Collective on Mat 5622 5623 Input Parameters: 5624 + A,B - the matrices in mpiaij format 5625 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5626 5627 Output Parameter: 5628 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5629 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5630 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5631 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5632 5633 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5634 for this matrix. This is not desirable.. 5635 5636 Level: developer 5637 5638 */ 5639 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5640 { 5641 PetscErrorCode ierr; 5642 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5643 Mat_SeqAIJ *b_oth; 5644 VecScatter ctx; 5645 MPI_Comm comm; 5646 const PetscMPIInt *rprocs,*sprocs; 5647 const PetscInt *srow,*rstarts,*sstarts; 5648 PetscInt *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5649 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5650 PetscScalar *b_otha,*bufa,*bufA,*vals = NULL; 5651 MPI_Request *rwaits = NULL,*swaits = NULL; 5652 MPI_Status rstatus; 5653 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5654 5655 PetscFunctionBegin; 5656 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5657 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5658 5659 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5660 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5661 } 5662 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5663 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5664 5665 if (size == 1) { 5666 startsj_s = NULL; 5667 bufa_ptr = NULL; 5668 *B_oth = NULL; 5669 PetscFunctionReturn(0); 5670 } 5671 5672 ctx = a->Mvctx; 5673 tag = ((PetscObject)ctx)->tag; 5674 5675 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5676 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5677 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5678 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5679 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5680 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5681 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5682 5683 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5684 if (scall == MAT_INITIAL_MATRIX) { 5685 /* i-array */ 5686 /*---------*/ 5687 /* post receives */ 5688 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5689 for (i=0; i<nrecvs; i++) { 5690 rowlen = rvalues + rstarts[i]*rbs; 5691 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5692 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5693 } 5694 5695 /* pack the outgoing message */ 5696 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5697 5698 sstartsj[0] = 0; 5699 rstartsj[0] = 0; 5700 len = 0; /* total length of j or a array to be sent */ 5701 if (nsends) { 5702 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5703 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5704 } 5705 for (i=0; i<nsends; i++) { 5706 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5707 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5708 for (j=0; j<nrows; j++) { 5709 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5710 for (l=0; l<sbs; l++) { 5711 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5712 5713 rowlen[j*sbs+l] = ncols; 5714 5715 len += ncols; 5716 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5717 } 5718 k++; 5719 } 5720 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5721 5722 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5723 } 5724 /* recvs and sends of i-array are completed */ 5725 i = nrecvs; 5726 while (i--) { 5727 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5728 } 5729 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5730 ierr = PetscFree(svalues);CHKERRQ(ierr); 5731 5732 /* allocate buffers for sending j and a arrays */ 5733 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5734 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5735 5736 /* create i-array of B_oth */ 5737 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5738 5739 b_othi[0] = 0; 5740 len = 0; /* total length of j or a array to be received */ 5741 k = 0; 5742 for (i=0; i<nrecvs; i++) { 5743 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5744 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5745 for (j=0; j<nrows; j++) { 5746 b_othi[k+1] = b_othi[k] + rowlen[j]; 5747 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5748 k++; 5749 } 5750 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5751 } 5752 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5753 5754 /* allocate space for j and a arrrays of B_oth */ 5755 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5756 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5757 5758 /* j-array */ 5759 /*---------*/ 5760 /* post receives of j-array */ 5761 for (i=0; i<nrecvs; i++) { 5762 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5763 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5764 } 5765 5766 /* pack the outgoing message j-array */ 5767 if (nsends) k = sstarts[0]; 5768 for (i=0; i<nsends; i++) { 5769 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5770 bufJ = bufj+sstartsj[i]; 5771 for (j=0; j<nrows; j++) { 5772 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5773 for (ll=0; ll<sbs; ll++) { 5774 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5775 for (l=0; l<ncols; l++) { 5776 *bufJ++ = cols[l]; 5777 } 5778 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5779 } 5780 } 5781 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5782 } 5783 5784 /* recvs and sends of j-array are completed */ 5785 i = nrecvs; 5786 while (i--) { 5787 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5788 } 5789 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5790 } else if (scall == MAT_REUSE_MATRIX) { 5791 sstartsj = *startsj_s; 5792 rstartsj = *startsj_r; 5793 bufa = *bufa_ptr; 5794 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5795 b_otha = b_oth->a; 5796 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5797 5798 /* a-array */ 5799 /*---------*/ 5800 /* post receives of a-array */ 5801 for (i=0; i<nrecvs; i++) { 5802 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5803 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5804 } 5805 5806 /* pack the outgoing message a-array */ 5807 if (nsends) k = sstarts[0]; 5808 for (i=0; i<nsends; i++) { 5809 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5810 bufA = bufa+sstartsj[i]; 5811 for (j=0; j<nrows; j++) { 5812 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5813 for (ll=0; ll<sbs; ll++) { 5814 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5815 for (l=0; l<ncols; l++) { 5816 *bufA++ = vals[l]; 5817 } 5818 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5819 } 5820 } 5821 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5822 } 5823 /* recvs and sends of a-array are completed */ 5824 i = nrecvs; 5825 while (i--) { 5826 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5827 } 5828 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5829 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5830 5831 if (scall == MAT_INITIAL_MATRIX) { 5832 /* put together the new matrix */ 5833 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5834 5835 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5836 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5837 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5838 b_oth->free_a = PETSC_TRUE; 5839 b_oth->free_ij = PETSC_TRUE; 5840 b_oth->nonew = 0; 5841 5842 ierr = PetscFree(bufj);CHKERRQ(ierr); 5843 if (!startsj_s || !bufa_ptr) { 5844 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5845 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5846 } else { 5847 *startsj_s = sstartsj; 5848 *startsj_r = rstartsj; 5849 *bufa_ptr = bufa; 5850 } 5851 } 5852 5853 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5854 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5855 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5856 PetscFunctionReturn(0); 5857 } 5858 5859 /*@C 5860 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5861 5862 Not Collective 5863 5864 Input Parameters: 5865 . A - The matrix in mpiaij format 5866 5867 Output Parameter: 5868 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5869 . colmap - A map from global column index to local index into lvec 5870 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5871 5872 Level: developer 5873 5874 @*/ 5875 #if defined(PETSC_USE_CTABLE) 5876 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5877 #else 5878 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5879 #endif 5880 { 5881 Mat_MPIAIJ *a; 5882 5883 PetscFunctionBegin; 5884 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5885 PetscValidPointer(lvec, 2); 5886 PetscValidPointer(colmap, 3); 5887 PetscValidPointer(multScatter, 4); 5888 a = (Mat_MPIAIJ*) A->data; 5889 if (lvec) *lvec = a->lvec; 5890 if (colmap) *colmap = a->colmap; 5891 if (multScatter) *multScatter = a->Mvctx; 5892 PetscFunctionReturn(0); 5893 } 5894 5895 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5896 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5897 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5898 #if defined(PETSC_HAVE_MKL_SPARSE) 5899 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5900 #endif 5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5902 #if defined(PETSC_HAVE_ELEMENTAL) 5903 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5904 #endif 5905 #if defined(PETSC_HAVE_HYPRE) 5906 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5907 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5908 #endif 5909 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5910 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5911 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5912 5913 /* 5914 Computes (B'*A')' since computing B*A directly is untenable 5915 5916 n p p 5917 ( ) ( ) ( ) 5918 m ( A ) * n ( B ) = m ( C ) 5919 ( ) ( ) ( ) 5920 5921 */ 5922 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5923 { 5924 PetscErrorCode ierr; 5925 Mat At,Bt,Ct; 5926 5927 PetscFunctionBegin; 5928 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5929 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5930 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5931 ierr = MatDestroy(&At);CHKERRQ(ierr); 5932 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5933 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5934 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5935 PetscFunctionReturn(0); 5936 } 5937 5938 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5939 { 5940 PetscErrorCode ierr; 5941 PetscInt m=A->rmap->n,n=B->cmap->n; 5942 Mat Cmat; 5943 5944 PetscFunctionBegin; 5945 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5946 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5947 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5948 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5949 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5950 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5951 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5952 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5953 5954 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5955 5956 *C = Cmat; 5957 PetscFunctionReturn(0); 5958 } 5959 5960 /* ----------------------------------------------------------------*/ 5961 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5962 { 5963 PetscErrorCode ierr; 5964 5965 PetscFunctionBegin; 5966 if (scall == MAT_INITIAL_MATRIX) { 5967 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5968 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5969 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5970 } 5971 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5972 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5973 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5974 PetscFunctionReturn(0); 5975 } 5976 5977 /*MC 5978 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5979 5980 Options Database Keys: 5981 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5982 5983 Level: beginner 5984 5985 Notes: 5986 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 5987 in this case the values associated with the rows and columns one passes in are set to zero 5988 in the matrix 5989 5990 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 5991 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 5992 5993 .seealso: MatCreateAIJ() 5994 M*/ 5995 5996 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5997 { 5998 Mat_MPIAIJ *b; 5999 PetscErrorCode ierr; 6000 PetscMPIInt size; 6001 6002 PetscFunctionBegin; 6003 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 6004 6005 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 6006 B->data = (void*)b; 6007 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 6008 B->assembled = PETSC_FALSE; 6009 B->insertmode = NOT_SET_VALUES; 6010 b->size = size; 6011 6012 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 6013 6014 /* build cache for off array entries formed */ 6015 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 6016 6017 b->donotstash = PETSC_FALSE; 6018 b->colmap = 0; 6019 b->garray = 0; 6020 b->roworiented = PETSC_TRUE; 6021 6022 /* stuff used for matrix vector multiply */ 6023 b->lvec = NULL; 6024 b->Mvctx = NULL; 6025 6026 /* stuff for MatGetRow() */ 6027 b->rowindices = 0; 6028 b->rowvalues = 0; 6029 b->getrowactive = PETSC_FALSE; 6030 6031 /* flexible pointer used in CUSP/CUSPARSE classes */ 6032 b->spptr = NULL; 6033 6034 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 6035 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 6036 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 6037 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 6038 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 6039 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 6040 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 6041 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 6042 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 6043 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 6044 #if defined(PETSC_HAVE_MKL_SPARSE) 6045 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 6046 #endif 6047 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 6048 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 6049 #if defined(PETSC_HAVE_ELEMENTAL) 6050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 6051 #endif 6052 #if defined(PETSC_HAVE_HYPRE) 6053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 6054 #endif 6055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 6056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 6057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 6058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 6059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 6060 #if defined(PETSC_HAVE_HYPRE) 6061 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 6062 #endif 6063 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 6064 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 6065 PetscFunctionReturn(0); 6066 } 6067 6068 /*@C 6069 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6070 and "off-diagonal" part of the matrix in CSR format. 6071 6072 Collective 6073 6074 Input Parameters: 6075 + comm - MPI communicator 6076 . m - number of local rows (Cannot be PETSC_DECIDE) 6077 . n - This value should be the same as the local size used in creating the 6078 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6079 calculated if N is given) For square matrices n is almost always m. 6080 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6081 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6082 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6083 . j - column indices 6084 . a - matrix values 6085 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6086 . oj - column indices 6087 - oa - matrix values 6088 6089 Output Parameter: 6090 . mat - the matrix 6091 6092 Level: advanced 6093 6094 Notes: 6095 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6096 must free the arrays once the matrix has been destroyed and not before. 6097 6098 The i and j indices are 0 based 6099 6100 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6101 6102 This sets local rows and cannot be used to set off-processor values. 6103 6104 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6105 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6106 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6107 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6108 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6109 communication if it is known that only local entries will be set. 6110 6111 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 6112 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 6113 @*/ 6114 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 6115 { 6116 PetscErrorCode ierr; 6117 Mat_MPIAIJ *maij; 6118 6119 PetscFunctionBegin; 6120 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6121 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 6122 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 6123 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 6124 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 6125 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 6126 maij = (Mat_MPIAIJ*) (*mat)->data; 6127 6128 (*mat)->preallocated = PETSC_TRUE; 6129 6130 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 6131 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 6132 6133 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 6134 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 6135 6136 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6137 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6138 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6139 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6140 6141 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 6142 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6143 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6144 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 6145 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6146 PetscFunctionReturn(0); 6147 } 6148 6149 /* 6150 Special version for direct calls from Fortran 6151 */ 6152 #include <petsc/private/fortranimpl.h> 6153 6154 /* Change these macros so can be used in void function */ 6155 #undef CHKERRQ 6156 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 6157 #undef SETERRQ2 6158 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 6159 #undef SETERRQ3 6160 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 6161 #undef SETERRQ 6162 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 6163 6164 #if defined(PETSC_HAVE_FORTRAN_CAPS) 6165 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 6166 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 6167 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 6168 #else 6169 #endif 6170 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 6171 { 6172 Mat mat = *mmat; 6173 PetscInt m = *mm, n = *mn; 6174 InsertMode addv = *maddv; 6175 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 6176 PetscScalar value; 6177 PetscErrorCode ierr; 6178 6179 MatCheckPreallocated(mat,1); 6180 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 6181 6182 #if defined(PETSC_USE_DEBUG) 6183 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 6184 #endif 6185 { 6186 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 6187 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 6188 PetscBool roworiented = aij->roworiented; 6189 6190 /* Some Variables required in the macro */ 6191 Mat A = aij->A; 6192 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 6193 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 6194 MatScalar *aa = a->a; 6195 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 6196 Mat B = aij->B; 6197 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 6198 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 6199 MatScalar *ba = b->a; 6200 6201 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 6202 PetscInt nonew = a->nonew; 6203 MatScalar *ap1,*ap2; 6204 6205 PetscFunctionBegin; 6206 for (i=0; i<m; i++) { 6207 if (im[i] < 0) continue; 6208 #if defined(PETSC_USE_DEBUG) 6209 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 6210 #endif 6211 if (im[i] >= rstart && im[i] < rend) { 6212 row = im[i] - rstart; 6213 lastcol1 = -1; 6214 rp1 = aj + ai[row]; 6215 ap1 = aa + ai[row]; 6216 rmax1 = aimax[row]; 6217 nrow1 = ailen[row]; 6218 low1 = 0; 6219 high1 = nrow1; 6220 lastcol2 = -1; 6221 rp2 = bj + bi[row]; 6222 ap2 = ba + bi[row]; 6223 rmax2 = bimax[row]; 6224 nrow2 = bilen[row]; 6225 low2 = 0; 6226 high2 = nrow2; 6227 6228 for (j=0; j<n; j++) { 6229 if (roworiented) value = v[i*n+j]; 6230 else value = v[i+j*m]; 6231 if (in[j] >= cstart && in[j] < cend) { 6232 col = in[j] - cstart; 6233 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 6234 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 6235 } else if (in[j] < 0) continue; 6236 #if defined(PETSC_USE_DEBUG) 6237 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 6238 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 6239 #endif 6240 else { 6241 if (mat->was_assembled) { 6242 if (!aij->colmap) { 6243 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 6244 } 6245 #if defined(PETSC_USE_CTABLE) 6246 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 6247 col--; 6248 #else 6249 col = aij->colmap[in[j]] - 1; 6250 #endif 6251 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 6252 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 6253 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 6254 col = in[j]; 6255 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 6256 B = aij->B; 6257 b = (Mat_SeqAIJ*)B->data; 6258 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 6259 rp2 = bj + bi[row]; 6260 ap2 = ba + bi[row]; 6261 rmax2 = bimax[row]; 6262 nrow2 = bilen[row]; 6263 low2 = 0; 6264 high2 = nrow2; 6265 bm = aij->B->rmap->n; 6266 ba = b->a; 6267 } 6268 } else col = in[j]; 6269 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 6270 } 6271 } 6272 } else if (!aij->donotstash) { 6273 if (roworiented) { 6274 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6275 } else { 6276 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 6277 } 6278 } 6279 } 6280 } 6281 PetscFunctionReturnVoid(); 6282 } 6283