1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/vecscatterimpl.h> 6 #include <petsc/private/isimpl.h> 7 #include <petscblaslapack.h> 8 #include <petscsf.h> 9 10 /*MC 11 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 12 13 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 14 and MATMPIAIJ otherwise. As a result, for single process communicators, 15 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 16 for communicators controlling multiple processes. It is recommended that you call both of 17 the above preallocation routines for simplicity. 18 19 Options Database Keys: 20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 21 22 Developer Notes: 23 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 24 enough exist. 25 26 Level: beginner 27 28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 29 M*/ 30 31 /*MC 32 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 35 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 42 43 Level: beginner 44 45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 46 M*/ 47 48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 49 { 50 PetscErrorCode ierr; 51 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 52 53 PetscFunctionBegin; 54 if (mat->A) { 55 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 56 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 57 } 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 62 { 63 PetscErrorCode ierr; 64 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 65 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 66 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 67 const PetscInt *ia,*ib; 68 const MatScalar *aa,*bb; 69 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 70 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 71 72 PetscFunctionBegin; 73 *keptrows = 0; 74 ia = a->i; 75 ib = b->i; 76 for (i=0; i<m; i++) { 77 na = ia[i+1] - ia[i]; 78 nb = ib[i+1] - ib[i]; 79 if (!na && !nb) { 80 cnt++; 81 goto ok1; 82 } 83 aa = a->a + ia[i]; 84 for (j=0; j<na; j++) { 85 if (aa[j] != 0.0) goto ok1; 86 } 87 bb = b->a + ib[i]; 88 for (j=0; j <nb; j++) { 89 if (bb[j] != 0.0) goto ok1; 90 } 91 cnt++; 92 ok1:; 93 } 94 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 95 if (!n0rows) PetscFunctionReturn(0); 96 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 97 cnt = 0; 98 for (i=0; i<m; i++) { 99 na = ia[i+1] - ia[i]; 100 nb = ib[i+1] - ib[i]; 101 if (!na && !nb) continue; 102 aa = a->a + ia[i]; 103 for (j=0; j<na;j++) { 104 if (aa[j] != 0.0) { 105 rows[cnt++] = rstart + i; 106 goto ok2; 107 } 108 } 109 bb = b->a + ib[i]; 110 for (j=0; j<nb; j++) { 111 if (bb[j] != 0.0) { 112 rows[cnt++] = rstart + i; 113 goto ok2; 114 } 115 } 116 ok2:; 117 } 118 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 123 { 124 PetscErrorCode ierr; 125 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 126 PetscBool cong; 127 128 PetscFunctionBegin; 129 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 130 if (Y->assembled && cong) { 131 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 132 } else { 133 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 134 } 135 PetscFunctionReturn(0); 136 } 137 138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 139 { 140 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 141 PetscErrorCode ierr; 142 PetscInt i,rstart,nrows,*rows; 143 144 PetscFunctionBegin; 145 *zrows = NULL; 146 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 147 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 148 for (i=0; i<nrows; i++) rows[i] += rstart; 149 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 154 { 155 PetscErrorCode ierr; 156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 157 PetscInt i,n,*garray = aij->garray; 158 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 159 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 160 PetscReal *work; 161 162 PetscFunctionBegin; 163 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 164 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 165 if (type == NORM_2) { 166 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 167 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 168 } 169 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 170 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 171 } 172 } else if (type == NORM_1) { 173 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 174 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 175 } 176 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 177 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 178 } 179 } else if (type == NORM_INFINITY) { 180 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 181 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 182 } 183 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 184 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 185 } 186 187 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 188 if (type == NORM_INFINITY) { 189 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 190 } else { 191 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 192 } 193 ierr = PetscFree(work);CHKERRQ(ierr); 194 if (type == NORM_2) { 195 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 201 { 202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 203 IS sis,gis; 204 PetscErrorCode ierr; 205 const PetscInt *isis,*igis; 206 PetscInt n,*iis,nsis,ngis,rstart,i; 207 208 PetscFunctionBegin; 209 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 210 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 211 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 212 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 213 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 214 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 215 216 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 217 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 218 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 219 n = ngis + nsis; 220 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 221 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 222 for (i=0; i<n; i++) iis[i] += rstart; 223 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 224 225 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 226 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 227 ierr = ISDestroy(&sis);CHKERRQ(ierr); 228 ierr = ISDestroy(&gis);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 /* 233 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 234 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 235 236 Only for square matrices 237 238 Used by a preconditioner, hence PETSC_EXTERN 239 */ 240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 241 { 242 PetscMPIInt rank,size; 243 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 244 PetscErrorCode ierr; 245 Mat mat; 246 Mat_SeqAIJ *gmata; 247 PetscMPIInt tag; 248 MPI_Status status; 249 PetscBool aij; 250 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 251 252 PetscFunctionBegin; 253 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 254 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 255 if (!rank) { 256 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 257 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 258 } 259 if (reuse == MAT_INITIAL_MATRIX) { 260 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 261 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 262 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 263 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 264 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 265 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 266 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 267 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 268 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 269 270 rowners[0] = 0; 271 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 272 rstart = rowners[rank]; 273 rend = rowners[rank+1]; 274 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 275 if (!rank) { 276 gmata = (Mat_SeqAIJ*) gmat->data; 277 /* send row lengths to all processors */ 278 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 279 for (i=1; i<size; i++) { 280 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 281 } 282 /* determine number diagonal and off-diagonal counts */ 283 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 284 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 285 jj = 0; 286 for (i=0; i<m; i++) { 287 for (j=0; j<dlens[i]; j++) { 288 if (gmata->j[jj] < rstart) ld[i]++; 289 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 290 jj++; 291 } 292 } 293 /* send column indices to other processes */ 294 for (i=1; i<size; i++) { 295 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 296 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 298 } 299 300 /* send numerical values to other processes */ 301 for (i=1; i<size; i++) { 302 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 303 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 304 } 305 gmataa = gmata->a; 306 gmataj = gmata->j; 307 308 } else { 309 /* receive row lengths */ 310 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* receive column indices */ 312 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 313 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 314 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 315 /* determine number diagonal and off-diagonal counts */ 316 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 317 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 318 jj = 0; 319 for (i=0; i<m; i++) { 320 for (j=0; j<dlens[i]; j++) { 321 if (gmataj[jj] < rstart) ld[i]++; 322 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 323 jj++; 324 } 325 } 326 /* receive numerical values */ 327 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 328 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 329 } 330 /* set preallocation */ 331 for (i=0; i<m; i++) { 332 dlens[i] -= olens[i]; 333 } 334 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 335 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 336 337 for (i=0; i<m; i++) { 338 dlens[i] += olens[i]; 339 } 340 cnt = 0; 341 for (i=0; i<m; i++) { 342 row = rstart + i; 343 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 344 cnt += dlens[i]; 345 } 346 if (rank) { 347 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 348 } 349 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 350 ierr = PetscFree(rowners);CHKERRQ(ierr); 351 352 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 353 354 *inmat = mat; 355 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 356 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 357 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 358 mat = *inmat; 359 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 360 if (!rank) { 361 /* send numerical values to other processes */ 362 gmata = (Mat_SeqAIJ*) gmat->data; 363 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 364 gmataa = gmata->a; 365 for (i=1; i<size; i++) { 366 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 367 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 368 } 369 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 370 } else { 371 /* receive numerical values from process 0*/ 372 nz = Ad->nz + Ao->nz; 373 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 374 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 375 } 376 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 377 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 378 ad = Ad->a; 379 ao = Ao->a; 380 if (mat->rmap->n) { 381 i = 0; 382 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 for (i=1; i<mat->rmap->n; i++) { 386 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 387 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 388 } 389 i--; 390 if (mat->rmap->n) { 391 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 392 } 393 if (rank) { 394 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 395 } 396 } 397 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 399 PetscFunctionReturn(0); 400 } 401 402 /* 403 Local utility routine that creates a mapping from the global column 404 number to the local number in the off-diagonal part of the local 405 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 406 a slightly higher hash table cost; without it it is not scalable (each processor 407 has an order N integer array but is fast to acess. 408 */ 409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscInt n = aij->B->cmap->n,i; 414 415 PetscFunctionBegin; 416 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 417 #if defined(PETSC_USE_CTABLE) 418 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 419 for (i=0; i<n; i++) { 420 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 421 } 422 #else 423 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 424 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 425 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 426 #endif 427 PetscFunctionReturn(0); 428 } 429 430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 431 { \ 432 if (col <= lastcol1) low1 = 0; \ 433 else high1 = nrow1; \ 434 lastcol1 = col;\ 435 while (high1-low1 > 5) { \ 436 t = (low1+high1)/2; \ 437 if (rp1[t] > col) high1 = t; \ 438 else low1 = t; \ 439 } \ 440 for (_i=low1; _i<high1; _i++) { \ 441 if (rp1[_i] > col) break; \ 442 if (rp1[_i] == col) { \ 443 if (addv == ADD_VALUES) { \ 444 ap1[_i] += value; \ 445 /* Not sure LogFlops will slow dow the code or not */ \ 446 (void)PetscLogFlops(1.0); \ 447 } \ 448 else ap1[_i] = value; \ 449 goto a_noinsert; \ 450 } \ 451 } \ 452 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 453 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 454 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 455 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 456 N = nrow1++ - 1; a->nz++; high1++; \ 457 /* shift up all the later entries in this row */ \ 458 for (ii=N; ii>=_i; ii--) { \ 459 rp1[ii+1] = rp1[ii]; \ 460 ap1[ii+1] = ap1[ii]; \ 461 } \ 462 rp1[_i] = col; \ 463 ap1[_i] = value; \ 464 A->nonzerostate++;\ 465 a_noinsert: ; \ 466 ailen[row] = nrow1; \ 467 } 468 469 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 470 { \ 471 if (col <= lastcol2) low2 = 0; \ 472 else high2 = nrow2; \ 473 lastcol2 = col; \ 474 while (high2-low2 > 5) { \ 475 t = (low2+high2)/2; \ 476 if (rp2[t] > col) high2 = t; \ 477 else low2 = t; \ 478 } \ 479 for (_i=low2; _i<high2; _i++) { \ 480 if (rp2[_i] > col) break; \ 481 if (rp2[_i] == col) { \ 482 if (addv == ADD_VALUES) { \ 483 ap2[_i] += value; \ 484 (void)PetscLogFlops(1.0); \ 485 } \ 486 else ap2[_i] = value; \ 487 goto b_noinsert; \ 488 } \ 489 } \ 490 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 491 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 492 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 493 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 494 N = nrow2++ - 1; b->nz++; high2++; \ 495 /* shift up all the later entries in this row */ \ 496 for (ii=N; ii>=_i; ii--) { \ 497 rp2[ii+1] = rp2[ii]; \ 498 ap2[ii+1] = ap2[ii]; \ 499 } \ 500 rp2[_i] = col; \ 501 ap2[_i] = value; \ 502 B->nonzerostate++; \ 503 b_noinsert: ; \ 504 bilen[row] = nrow2; \ 505 } 506 507 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 508 { 509 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 510 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 511 PetscErrorCode ierr; 512 PetscInt l,*garray = mat->garray,diag; 513 514 PetscFunctionBegin; 515 /* code only works for square matrices A */ 516 517 /* find size of row to the left of the diagonal part */ 518 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 519 row = row - diag; 520 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 521 if (garray[b->j[b->i[row]+l]] > diag) break; 522 } 523 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 524 525 /* diagonal part */ 526 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 527 528 /* right of diagonal part */ 529 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 530 PetscFunctionReturn(0); 531 } 532 533 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 534 { 535 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 536 PetscScalar value; 537 PetscErrorCode ierr; 538 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 539 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 540 PetscBool roworiented = aij->roworiented; 541 542 /* Some Variables required in the macro */ 543 Mat A = aij->A; 544 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 545 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 546 MatScalar *aa = a->a; 547 PetscBool ignorezeroentries = a->ignorezeroentries; 548 Mat B = aij->B; 549 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 550 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 551 MatScalar *ba = b->a; 552 553 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 554 PetscInt nonew; 555 MatScalar *ap1,*ap2; 556 557 PetscFunctionBegin; 558 for (i=0; i<m; i++) { 559 if (im[i] < 0) continue; 560 #if defined(PETSC_USE_DEBUG) 561 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 562 #endif 563 if (im[i] >= rstart && im[i] < rend) { 564 row = im[i] - rstart; 565 lastcol1 = -1; 566 rp1 = aj + ai[row]; 567 ap1 = aa + ai[row]; 568 rmax1 = aimax[row]; 569 nrow1 = ailen[row]; 570 low1 = 0; 571 high1 = nrow1; 572 lastcol2 = -1; 573 rp2 = bj + bi[row]; 574 ap2 = ba + bi[row]; 575 rmax2 = bimax[row]; 576 nrow2 = bilen[row]; 577 low2 = 0; 578 high2 = nrow2; 579 580 for (j=0; j<n; j++) { 581 if (roworiented) value = v[i*n+j]; 582 else value = v[i+j*m]; 583 if (in[j] >= cstart && in[j] < cend) { 584 col = in[j] - cstart; 585 nonew = a->nonew; 586 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 587 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 588 } else if (in[j] < 0) continue; 589 #if defined(PETSC_USE_DEBUG) 590 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 591 #endif 592 else { 593 if (mat->was_assembled) { 594 if (!aij->colmap) { 595 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 596 } 597 #if defined(PETSC_USE_CTABLE) 598 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 599 col--; 600 #else 601 col = aij->colmap[in[j]] - 1; 602 #endif 603 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 604 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 605 col = in[j]; 606 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 607 B = aij->B; 608 b = (Mat_SeqAIJ*)B->data; 609 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 610 rp2 = bj + bi[row]; 611 ap2 = ba + bi[row]; 612 rmax2 = bimax[row]; 613 nrow2 = bilen[row]; 614 low2 = 0; 615 high2 = nrow2; 616 bm = aij->B->rmap->n; 617 ba = b->a; 618 } else if (col < 0) { 619 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 620 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 621 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 622 } 623 } else col = in[j]; 624 nonew = b->nonew; 625 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 626 } 627 } 628 } else { 629 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 630 if (!aij->donotstash) { 631 mat->assembled = PETSC_FALSE; 632 if (roworiented) { 633 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 634 } else { 635 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 636 } 637 } 638 } 639 } 640 PetscFunctionReturn(0); 641 } 642 643 /* 644 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 645 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 646 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 647 */ 648 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 649 { 650 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 651 Mat A = aij->A; /* diagonal part of the matrix */ 652 Mat B = aij->B; /* offdiagonal part of the matrix */ 653 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 654 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 655 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 656 PetscInt *ailen = a->ilen,*aj = a->j; 657 PetscInt *bilen = b->ilen,*bj = b->j; 658 PetscInt am = aij->A->rmap->n,j; 659 PetscInt diag_so_far = 0,dnz; 660 PetscInt offd_so_far = 0,onz; 661 662 PetscFunctionBegin; 663 /* Iterate over all rows of the matrix */ 664 for (j=0; j<am; j++) { 665 dnz = onz = 0; 666 /* Iterate over all non-zero columns of the current row */ 667 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 668 /* If column is in the diagonal */ 669 if (mat_j[col] >= cstart && mat_j[col] < cend) { 670 aj[diag_so_far++] = mat_j[col] - cstart; 671 dnz++; 672 } else { /* off-diagonal entries */ 673 bj[offd_so_far++] = mat_j[col]; 674 onz++; 675 } 676 } 677 ailen[j] = dnz; 678 bilen[j] = onz; 679 } 680 PetscFunctionReturn(0); 681 } 682 683 /* 684 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 685 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 686 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 687 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 688 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 689 */ 690 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 691 { 692 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 693 Mat A = aij->A; /* diagonal part of the matrix */ 694 Mat B = aij->B; /* offdiagonal part of the matrix */ 695 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 696 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 697 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 698 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 699 PetscInt *ailen = a->ilen,*aj = a->j; 700 PetscInt *bilen = b->ilen,*bj = b->j; 701 PetscInt am = aij->A->rmap->n,j; 702 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 703 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 704 PetscScalar *aa = a->a,*ba = b->a; 705 706 PetscFunctionBegin; 707 /* Iterate over all rows of the matrix */ 708 for (j=0; j<am; j++) { 709 dnz_row = onz_row = 0; 710 rowstart_offd = full_offd_i[j]; 711 rowstart_diag = full_diag_i[j]; 712 /* Iterate over all non-zero columns of the current row */ 713 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 714 /* If column is in the diagonal */ 715 if (mat_j[col] >= cstart && mat_j[col] < cend) { 716 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 717 aa[rowstart_diag+dnz_row] = mat_a[col]; 718 dnz_row++; 719 } else { /* off-diagonal entries */ 720 bj[rowstart_offd+onz_row] = mat_j[col]; 721 ba[rowstart_offd+onz_row] = mat_a[col]; 722 onz_row++; 723 } 724 } 725 ailen[j] = dnz_row; 726 bilen[j] = onz_row; 727 } 728 PetscFunctionReturn(0); 729 } 730 731 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 732 { 733 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 734 PetscErrorCode ierr; 735 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 736 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 737 738 PetscFunctionBegin; 739 for (i=0; i<m; i++) { 740 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 741 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 742 if (idxm[i] >= rstart && idxm[i] < rend) { 743 row = idxm[i] - rstart; 744 for (j=0; j<n; j++) { 745 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 746 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 747 if (idxn[j] >= cstart && idxn[j] < cend) { 748 col = idxn[j] - cstart; 749 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 750 } else { 751 if (!aij->colmap) { 752 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 753 } 754 #if defined(PETSC_USE_CTABLE) 755 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 756 col--; 757 #else 758 col = aij->colmap[idxn[j]] - 1; 759 #endif 760 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 761 else { 762 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 763 } 764 } 765 } 766 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 767 } 768 PetscFunctionReturn(0); 769 } 770 771 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 772 773 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 774 { 775 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 776 PetscErrorCode ierr; 777 PetscInt nstash,reallocs; 778 779 PetscFunctionBegin; 780 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 781 782 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 783 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 784 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 785 PetscFunctionReturn(0); 786 } 787 788 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 789 { 790 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 791 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 792 PetscErrorCode ierr; 793 PetscMPIInt n; 794 PetscInt i,j,rstart,ncols,flg; 795 PetscInt *row,*col; 796 PetscBool other_disassembled; 797 PetscScalar *val; 798 799 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 800 801 PetscFunctionBegin; 802 if (!aij->donotstash && !mat->nooffprocentries) { 803 while (1) { 804 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 805 if (!flg) break; 806 807 for (i=0; i<n; ) { 808 /* Now identify the consecutive vals belonging to the same row */ 809 for (j=i,rstart=row[j]; j<n; j++) { 810 if (row[j] != rstart) break; 811 } 812 if (j < n) ncols = j-i; 813 else ncols = n-i; 814 /* Now assemble all these values with a single function call */ 815 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 816 817 i = j; 818 } 819 } 820 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 821 } 822 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 823 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 824 825 /* determine if any processor has disassembled, if so we must 826 also disassemble ourselfs, in order that we may reassemble. */ 827 /* 828 if nonzero structure of submatrix B cannot change then we know that 829 no processor disassembled thus we can skip this stuff 830 */ 831 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 832 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 833 if (mat->was_assembled && !other_disassembled) { 834 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 835 } 836 } 837 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 838 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 839 } 840 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 841 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 842 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 843 844 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 845 846 aij->rowvalues = 0; 847 848 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 849 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 850 851 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 852 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 853 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 854 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 855 } 856 PetscFunctionReturn(0); 857 } 858 859 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 860 { 861 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 862 PetscErrorCode ierr; 863 864 PetscFunctionBegin; 865 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 866 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 867 PetscFunctionReturn(0); 868 } 869 870 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 871 { 872 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 873 PetscObjectState sA, sB; 874 PetscInt *lrows; 875 PetscInt r, len; 876 PetscBool cong, lch, gch; 877 PetscErrorCode ierr; 878 879 PetscFunctionBegin; 880 /* get locally owned rows */ 881 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 882 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 883 /* fix right hand side if needed */ 884 if (x && b) { 885 const PetscScalar *xx; 886 PetscScalar *bb; 887 888 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 889 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 890 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 891 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 892 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 893 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 894 } 895 896 sA = mat->A->nonzerostate; 897 sB = mat->B->nonzerostate; 898 899 if (diag != 0.0 && cong) { 900 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 901 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 902 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 903 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 904 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 905 PetscInt nnwA, nnwB; 906 PetscBool nnzA, nnzB; 907 908 nnwA = aijA->nonew; 909 nnwB = aijB->nonew; 910 nnzA = aijA->keepnonzeropattern; 911 nnzB = aijB->keepnonzeropattern; 912 if (!nnzA) { 913 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 914 aijA->nonew = 0; 915 } 916 if (!nnzB) { 917 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 918 aijB->nonew = 0; 919 } 920 /* Must zero here before the next loop */ 921 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 922 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 923 for (r = 0; r < len; ++r) { 924 const PetscInt row = lrows[r] + A->rmap->rstart; 925 if (row >= A->cmap->N) continue; 926 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 927 } 928 aijA->nonew = nnwA; 929 aijB->nonew = nnwB; 930 } else { 931 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 932 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 933 } 934 ierr = PetscFree(lrows);CHKERRQ(ierr); 935 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 936 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 937 938 /* reduce nonzerostate */ 939 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 940 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 941 if (gch) A->nonzerostate++; 942 PetscFunctionReturn(0); 943 } 944 945 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 946 { 947 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 948 PetscErrorCode ierr; 949 PetscMPIInt n = A->rmap->n; 950 PetscInt i,j,r,m,p = 0,len = 0; 951 PetscInt *lrows,*owners = A->rmap->range; 952 PetscSFNode *rrows; 953 PetscSF sf; 954 const PetscScalar *xx; 955 PetscScalar *bb,*mask; 956 Vec xmask,lmask; 957 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 958 const PetscInt *aj, *ii,*ridx; 959 PetscScalar *aa; 960 961 PetscFunctionBegin; 962 /* Create SF where leaves are input rows and roots are owned rows */ 963 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 964 for (r = 0; r < n; ++r) lrows[r] = -1; 965 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 966 for (r = 0; r < N; ++r) { 967 const PetscInt idx = rows[r]; 968 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 969 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 970 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 971 } 972 rrows[r].rank = p; 973 rrows[r].index = rows[r] - owners[p]; 974 } 975 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 976 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 977 /* Collect flags for rows to be zeroed */ 978 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 979 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 980 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 981 /* Compress and put in row numbers */ 982 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 983 /* zero diagonal part of matrix */ 984 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 985 /* handle off diagonal part of matrix */ 986 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 987 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 988 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 989 for (i=0; i<len; i++) bb[lrows[i]] = 1; 990 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 991 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 994 if (x && b) { /* this code is buggy when the row and column layout don't match */ 995 PetscBool cong; 996 997 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 998 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 999 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1000 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1001 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1002 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1003 } 1004 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1005 /* remove zeroed rows of off diagonal matrix */ 1006 ii = aij->i; 1007 for (i=0; i<len; i++) { 1008 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 1009 } 1010 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1011 if (aij->compressedrow.use) { 1012 m = aij->compressedrow.nrows; 1013 ii = aij->compressedrow.i; 1014 ridx = aij->compressedrow.rindex; 1015 for (i=0; i<m; i++) { 1016 n = ii[i+1] - ii[i]; 1017 aj = aij->j + ii[i]; 1018 aa = aij->a + ii[i]; 1019 1020 for (j=0; j<n; j++) { 1021 if (PetscAbsScalar(mask[*aj])) { 1022 if (b) bb[*ridx] -= *aa*xx[*aj]; 1023 *aa = 0.0; 1024 } 1025 aa++; 1026 aj++; 1027 } 1028 ridx++; 1029 } 1030 } else { /* do not use compressed row format */ 1031 m = l->B->rmap->n; 1032 for (i=0; i<m; i++) { 1033 n = ii[i+1] - ii[i]; 1034 aj = aij->j + ii[i]; 1035 aa = aij->a + ii[i]; 1036 for (j=0; j<n; j++) { 1037 if (PetscAbsScalar(mask[*aj])) { 1038 if (b) bb[i] -= *aa*xx[*aj]; 1039 *aa = 0.0; 1040 } 1041 aa++; 1042 aj++; 1043 } 1044 } 1045 } 1046 if (x && b) { 1047 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1048 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1049 } 1050 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1051 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1052 ierr = PetscFree(lrows);CHKERRQ(ierr); 1053 1054 /* only change matrix nonzero state if pattern was allowed to be changed */ 1055 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1056 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1057 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1058 } 1059 PetscFunctionReturn(0); 1060 } 1061 1062 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1063 { 1064 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1065 PetscErrorCode ierr; 1066 PetscInt nt; 1067 VecScatter Mvctx = a->Mvctx; 1068 1069 PetscFunctionBegin; 1070 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1071 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1072 1073 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1074 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1075 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1076 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1077 PetscFunctionReturn(0); 1078 } 1079 1080 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1083 PetscErrorCode ierr; 1084 1085 PetscFunctionBegin; 1086 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1087 PetscFunctionReturn(0); 1088 } 1089 1090 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1093 PetscErrorCode ierr; 1094 VecScatter Mvctx = a->Mvctx; 1095 1096 PetscFunctionBegin; 1097 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1098 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1099 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1100 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1101 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1102 PetscFunctionReturn(0); 1103 } 1104 1105 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1106 { 1107 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 /* do nondiagonal part */ 1112 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1113 /* do local part */ 1114 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1115 /* add partial results together */ 1116 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1117 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1118 PetscFunctionReturn(0); 1119 } 1120 1121 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1122 { 1123 MPI_Comm comm; 1124 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1125 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1126 IS Me,Notme; 1127 PetscErrorCode ierr; 1128 PetscInt M,N,first,last,*notme,i; 1129 PetscBool lf; 1130 PetscMPIInt size; 1131 1132 PetscFunctionBegin; 1133 /* Easy test: symmetric diagonal block */ 1134 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1135 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1136 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1137 if (!*f) PetscFunctionReturn(0); 1138 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1139 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1140 if (size == 1) PetscFunctionReturn(0); 1141 1142 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1143 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1144 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1145 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1146 for (i=0; i<first; i++) notme[i] = i; 1147 for (i=last; i<M; i++) notme[i-last+first] = i; 1148 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1149 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1150 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1151 Aoff = Aoffs[0]; 1152 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1153 Boff = Boffs[0]; 1154 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1155 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1156 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1157 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1158 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1159 ierr = PetscFree(notme);CHKERRQ(ierr); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1164 { 1165 PetscErrorCode ierr; 1166 1167 PetscFunctionBegin; 1168 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1169 PetscFunctionReturn(0); 1170 } 1171 1172 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1173 { 1174 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 /* do nondiagonal part */ 1179 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1180 /* do local part */ 1181 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1182 /* add partial results together */ 1183 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1184 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 /* 1189 This only works correctly for square matrices where the subblock A->A is the 1190 diagonal block 1191 */ 1192 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1193 { 1194 PetscErrorCode ierr; 1195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1196 1197 PetscFunctionBegin; 1198 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1199 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1200 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1205 { 1206 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1207 PetscErrorCode ierr; 1208 1209 PetscFunctionBegin; 1210 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1211 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1216 { 1217 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 #if defined(PETSC_USE_LOG) 1222 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1223 #endif 1224 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1225 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1226 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1227 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1228 #if defined(PETSC_USE_CTABLE) 1229 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1230 #else 1231 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1232 #endif 1233 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1234 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1235 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1236 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1237 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1238 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1239 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1240 1241 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1250 #if defined(PETSC_HAVE_ELEMENTAL) 1251 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1252 #endif 1253 #if defined(PETSC_HAVE_HYPRE) 1254 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1255 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1256 #endif 1257 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1258 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1259 PetscFunctionReturn(0); 1260 } 1261 1262 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1265 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1266 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1267 PetscErrorCode ierr; 1268 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1269 int fd; 1270 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1271 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1272 PetscScalar *column_values; 1273 PetscInt message_count,flowcontrolcount; 1274 FILE *file; 1275 1276 PetscFunctionBegin; 1277 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1278 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1279 nz = A->nz + B->nz; 1280 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1281 if (!rank) { 1282 header[0] = MAT_FILE_CLASSID; 1283 header[1] = mat->rmap->N; 1284 header[2] = mat->cmap->N; 1285 1286 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1287 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1288 /* get largest number of rows any processor has */ 1289 rlen = mat->rmap->n; 1290 range = mat->rmap->range; 1291 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1292 } else { 1293 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1294 rlen = mat->rmap->n; 1295 } 1296 1297 /* load up the local row counts */ 1298 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1299 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1300 1301 /* store the row lengths to the file */ 1302 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1303 if (!rank) { 1304 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1305 for (i=1; i<size; i++) { 1306 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1307 rlen = range[i+1] - range[i]; 1308 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1309 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1310 } 1311 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1312 } else { 1313 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1314 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1315 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1316 } 1317 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1318 1319 /* load up the local column indices */ 1320 nzmax = nz; /* th processor needs space a largest processor needs */ 1321 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1322 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1323 cnt = 0; 1324 for (i=0; i<mat->rmap->n; i++) { 1325 for (j=B->i[i]; j<B->i[i+1]; j++) { 1326 if ((col = garray[B->j[j]]) > cstart) break; 1327 column_indices[cnt++] = col; 1328 } 1329 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1330 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1331 } 1332 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1333 1334 /* store the column indices to the file */ 1335 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1336 if (!rank) { 1337 MPI_Status status; 1338 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1339 for (i=1; i<size; i++) { 1340 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1341 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1342 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1343 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1344 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1345 } 1346 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1347 } else { 1348 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1349 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1350 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1351 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1352 } 1353 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1354 1355 /* load up the local column values */ 1356 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1357 cnt = 0; 1358 for (i=0; i<mat->rmap->n; i++) { 1359 for (j=B->i[i]; j<B->i[i+1]; j++) { 1360 if (garray[B->j[j]] > cstart) break; 1361 column_values[cnt++] = B->a[j]; 1362 } 1363 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1364 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1365 } 1366 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1367 1368 /* store the column values to the file */ 1369 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1370 if (!rank) { 1371 MPI_Status status; 1372 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1373 for (i=1; i<size; i++) { 1374 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1375 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1376 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1377 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1378 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1379 } 1380 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1381 } else { 1382 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1383 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1384 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1385 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(column_values);CHKERRQ(ierr); 1388 1389 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1390 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1391 PetscFunctionReturn(0); 1392 } 1393 1394 #include <petscdraw.h> 1395 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1396 { 1397 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1398 PetscErrorCode ierr; 1399 PetscMPIInt rank = aij->rank,size = aij->size; 1400 PetscBool isdraw,iascii,isbinary; 1401 PetscViewer sviewer; 1402 PetscViewerFormat format; 1403 1404 PetscFunctionBegin; 1405 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1406 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1407 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1408 if (iascii) { 1409 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1410 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1411 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1412 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1413 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1414 for (i=0; i<(PetscInt)size; i++) { 1415 nmax = PetscMax(nmax,nz[i]); 1416 nmin = PetscMin(nmin,nz[i]); 1417 navg += nz[i]; 1418 } 1419 ierr = PetscFree(nz);CHKERRQ(ierr); 1420 navg = navg/size; 1421 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1422 PetscFunctionReturn(0); 1423 } 1424 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1425 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1426 MatInfo info; 1427 PetscBool inodes; 1428 1429 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1430 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1431 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1432 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1433 if (!inodes) { 1434 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1435 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1436 } else { 1437 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1438 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1439 } 1440 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1441 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1442 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1443 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1444 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1445 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1446 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1447 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1448 PetscFunctionReturn(0); 1449 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1450 PetscInt inodecount,inodelimit,*inodes; 1451 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1452 if (inodes) { 1453 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1454 } else { 1455 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1456 } 1457 PetscFunctionReturn(0); 1458 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1459 PetscFunctionReturn(0); 1460 } 1461 } else if (isbinary) { 1462 if (size == 1) { 1463 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1464 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1465 } else { 1466 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1467 } 1468 PetscFunctionReturn(0); 1469 } else if (iascii && size == 1) { 1470 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1471 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1472 PetscFunctionReturn(0); 1473 } else if (isdraw) { 1474 PetscDraw draw; 1475 PetscBool isnull; 1476 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1477 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1478 if (isnull) PetscFunctionReturn(0); 1479 } 1480 1481 { /* assemble the entire matrix onto first processor */ 1482 Mat A = NULL, Av; 1483 IS isrow,iscol; 1484 1485 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1486 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1487 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1488 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1489 /* The commented code uses MatCreateSubMatrices instead */ 1490 /* 1491 Mat *AA, A = NULL, Av; 1492 IS isrow,iscol; 1493 1494 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1495 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1496 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1497 if (!rank) { 1498 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1499 A = AA[0]; 1500 Av = AA[0]; 1501 } 1502 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1503 */ 1504 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1505 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1506 /* 1507 Everyone has to call to draw the matrix since the graphics waits are 1508 synchronized across all processors that share the PetscDraw object 1509 */ 1510 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1511 if (!rank) { 1512 if (((PetscObject)mat)->name) { 1513 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1514 } 1515 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1516 } 1517 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1518 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1519 ierr = MatDestroy(&A);CHKERRQ(ierr); 1520 } 1521 PetscFunctionReturn(0); 1522 } 1523 1524 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1525 { 1526 PetscErrorCode ierr; 1527 PetscBool iascii,isdraw,issocket,isbinary; 1528 1529 PetscFunctionBegin; 1530 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1531 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1532 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1533 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1534 if (iascii || isdraw || isbinary || issocket) { 1535 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1536 } 1537 PetscFunctionReturn(0); 1538 } 1539 1540 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1541 { 1542 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1543 PetscErrorCode ierr; 1544 Vec bb1 = 0; 1545 PetscBool hasop; 1546 1547 PetscFunctionBegin; 1548 if (flag == SOR_APPLY_UPPER) { 1549 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1550 PetscFunctionReturn(0); 1551 } 1552 1553 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1554 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1555 } 1556 1557 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1558 if (flag & SOR_ZERO_INITIAL_GUESS) { 1559 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1560 its--; 1561 } 1562 1563 while (its--) { 1564 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1565 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 1567 /* update rhs: bb1 = bb - B*x */ 1568 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1569 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1570 1571 /* local sweep */ 1572 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1573 } 1574 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1575 if (flag & SOR_ZERO_INITIAL_GUESS) { 1576 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1577 its--; 1578 } 1579 while (its--) { 1580 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1581 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1582 1583 /* update rhs: bb1 = bb - B*x */ 1584 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1585 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1586 1587 /* local sweep */ 1588 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1589 } 1590 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1591 if (flag & SOR_ZERO_INITIAL_GUESS) { 1592 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1593 its--; 1594 } 1595 while (its--) { 1596 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1597 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1598 1599 /* update rhs: bb1 = bb - B*x */ 1600 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1601 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1602 1603 /* local sweep */ 1604 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1605 } 1606 } else if (flag & SOR_EISENSTAT) { 1607 Vec xx1; 1608 1609 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1610 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1611 1612 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1613 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1614 if (!mat->diag) { 1615 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1616 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1617 } 1618 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1619 if (hasop) { 1620 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1621 } else { 1622 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1623 } 1624 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1625 1626 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1627 1628 /* local sweep */ 1629 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1630 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1631 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1632 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1633 1634 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1635 1636 matin->factorerrortype = mat->A->factorerrortype; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1641 { 1642 Mat aA,aB,Aperm; 1643 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1644 PetscScalar *aa,*ba; 1645 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1646 PetscSF rowsf,sf; 1647 IS parcolp = NULL; 1648 PetscBool done; 1649 PetscErrorCode ierr; 1650 1651 PetscFunctionBegin; 1652 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1653 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1654 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1655 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1656 1657 /* Invert row permutation to find out where my rows should go */ 1658 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1659 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1660 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1661 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1662 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1663 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1664 1665 /* Invert column permutation to find out where my columns should go */ 1666 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1667 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1668 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1669 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1670 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1671 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1672 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1673 1674 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1675 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1676 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1677 1678 /* Find out where my gcols should go */ 1679 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1680 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1681 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1682 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1683 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1684 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1685 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1686 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1687 1688 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1689 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1690 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1691 for (i=0; i<m; i++) { 1692 PetscInt row = rdest[i],rowner; 1693 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1694 for (j=ai[i]; j<ai[i+1]; j++) { 1695 PetscInt cowner,col = cdest[aj[j]]; 1696 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1697 if (rowner == cowner) dnnz[i]++; 1698 else onnz[i]++; 1699 } 1700 for (j=bi[i]; j<bi[i+1]; j++) { 1701 PetscInt cowner,col = gcdest[bj[j]]; 1702 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1703 if (rowner == cowner) dnnz[i]++; 1704 else onnz[i]++; 1705 } 1706 } 1707 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1708 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1709 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1710 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1711 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1712 1713 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1714 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1715 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1716 for (i=0; i<m; i++) { 1717 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1718 PetscInt j0,rowlen; 1719 rowlen = ai[i+1] - ai[i]; 1720 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1721 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1722 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1723 } 1724 rowlen = bi[i+1] - bi[i]; 1725 for (j0=j=0; j<rowlen; j0=j) { 1726 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1727 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1728 } 1729 } 1730 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1731 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1732 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1733 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1734 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1735 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1736 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1737 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1738 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1739 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1740 *B = Aperm; 1741 PetscFunctionReturn(0); 1742 } 1743 1744 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1745 { 1746 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1747 PetscErrorCode ierr; 1748 1749 PetscFunctionBegin; 1750 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1751 if (ghosts) *ghosts = aij->garray; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1756 { 1757 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1758 Mat A = mat->A,B = mat->B; 1759 PetscErrorCode ierr; 1760 PetscReal isend[5],irecv[5]; 1761 1762 PetscFunctionBegin; 1763 info->block_size = 1.0; 1764 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1765 1766 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1767 isend[3] = info->memory; isend[4] = info->mallocs; 1768 1769 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1770 1771 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1772 isend[3] += info->memory; isend[4] += info->mallocs; 1773 if (flag == MAT_LOCAL) { 1774 info->nz_used = isend[0]; 1775 info->nz_allocated = isend[1]; 1776 info->nz_unneeded = isend[2]; 1777 info->memory = isend[3]; 1778 info->mallocs = isend[4]; 1779 } else if (flag == MAT_GLOBAL_MAX) { 1780 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1781 1782 info->nz_used = irecv[0]; 1783 info->nz_allocated = irecv[1]; 1784 info->nz_unneeded = irecv[2]; 1785 info->memory = irecv[3]; 1786 info->mallocs = irecv[4]; 1787 } else if (flag == MAT_GLOBAL_SUM) { 1788 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1789 1790 info->nz_used = irecv[0]; 1791 info->nz_allocated = irecv[1]; 1792 info->nz_unneeded = irecv[2]; 1793 info->memory = irecv[3]; 1794 info->mallocs = irecv[4]; 1795 } 1796 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1797 info->fill_ratio_needed = 0; 1798 info->factor_mallocs = 0; 1799 PetscFunctionReturn(0); 1800 } 1801 1802 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1803 { 1804 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1805 PetscErrorCode ierr; 1806 1807 PetscFunctionBegin; 1808 switch (op) { 1809 case MAT_NEW_NONZERO_LOCATIONS: 1810 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1811 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1812 case MAT_KEEP_NONZERO_PATTERN: 1813 case MAT_NEW_NONZERO_LOCATION_ERR: 1814 case MAT_USE_INODES: 1815 case MAT_IGNORE_ZERO_ENTRIES: 1816 MatCheckPreallocated(A,1); 1817 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1818 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1819 break; 1820 case MAT_ROW_ORIENTED: 1821 MatCheckPreallocated(A,1); 1822 a->roworiented = flg; 1823 1824 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1825 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1826 break; 1827 case MAT_NEW_DIAGONALS: 1828 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1829 break; 1830 case MAT_IGNORE_OFF_PROC_ENTRIES: 1831 a->donotstash = flg; 1832 break; 1833 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1834 case MAT_SPD: 1835 case MAT_SYMMETRIC: 1836 case MAT_STRUCTURALLY_SYMMETRIC: 1837 case MAT_HERMITIAN: 1838 case MAT_SYMMETRY_ETERNAL: 1839 break; 1840 case MAT_SUBMAT_SINGLEIS: 1841 A->submat_singleis = flg; 1842 break; 1843 case MAT_STRUCTURE_ONLY: 1844 /* The option is handled directly by MatSetOption() */ 1845 break; 1846 default: 1847 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1848 } 1849 PetscFunctionReturn(0); 1850 } 1851 1852 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1853 { 1854 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1855 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1856 PetscErrorCode ierr; 1857 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1858 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1859 PetscInt *cmap,*idx_p; 1860 1861 PetscFunctionBegin; 1862 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1863 mat->getrowactive = PETSC_TRUE; 1864 1865 if (!mat->rowvalues && (idx || v)) { 1866 /* 1867 allocate enough space to hold information from the longest row. 1868 */ 1869 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1870 PetscInt max = 1,tmp; 1871 for (i=0; i<matin->rmap->n; i++) { 1872 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1873 if (max < tmp) max = tmp; 1874 } 1875 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1876 } 1877 1878 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1879 lrow = row - rstart; 1880 1881 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1882 if (!v) {pvA = 0; pvB = 0;} 1883 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1884 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1885 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1886 nztot = nzA + nzB; 1887 1888 cmap = mat->garray; 1889 if (v || idx) { 1890 if (nztot) { 1891 /* Sort by increasing column numbers, assuming A and B already sorted */ 1892 PetscInt imark = -1; 1893 if (v) { 1894 *v = v_p = mat->rowvalues; 1895 for (i=0; i<nzB; i++) { 1896 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1897 else break; 1898 } 1899 imark = i; 1900 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1901 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1902 } 1903 if (idx) { 1904 *idx = idx_p = mat->rowindices; 1905 if (imark > -1) { 1906 for (i=0; i<imark; i++) { 1907 idx_p[i] = cmap[cworkB[i]]; 1908 } 1909 } else { 1910 for (i=0; i<nzB; i++) { 1911 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1912 else break; 1913 } 1914 imark = i; 1915 } 1916 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1917 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1918 } 1919 } else { 1920 if (idx) *idx = 0; 1921 if (v) *v = 0; 1922 } 1923 } 1924 *nz = nztot; 1925 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1926 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1931 { 1932 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1933 1934 PetscFunctionBegin; 1935 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1936 aij->getrowactive = PETSC_FALSE; 1937 PetscFunctionReturn(0); 1938 } 1939 1940 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1941 { 1942 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1943 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1944 PetscErrorCode ierr; 1945 PetscInt i,j,cstart = mat->cmap->rstart; 1946 PetscReal sum = 0.0; 1947 MatScalar *v; 1948 1949 PetscFunctionBegin; 1950 if (aij->size == 1) { 1951 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1952 } else { 1953 if (type == NORM_FROBENIUS) { 1954 v = amat->a; 1955 for (i=0; i<amat->nz; i++) { 1956 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1957 } 1958 v = bmat->a; 1959 for (i=0; i<bmat->nz; i++) { 1960 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1961 } 1962 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1963 *norm = PetscSqrtReal(*norm); 1964 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1965 } else if (type == NORM_1) { /* max column norm */ 1966 PetscReal *tmp,*tmp2; 1967 PetscInt *jj,*garray = aij->garray; 1968 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1969 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1970 *norm = 0.0; 1971 v = amat->a; jj = amat->j; 1972 for (j=0; j<amat->nz; j++) { 1973 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1974 } 1975 v = bmat->a; jj = bmat->j; 1976 for (j=0; j<bmat->nz; j++) { 1977 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1978 } 1979 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1980 for (j=0; j<mat->cmap->N; j++) { 1981 if (tmp2[j] > *norm) *norm = tmp2[j]; 1982 } 1983 ierr = PetscFree(tmp);CHKERRQ(ierr); 1984 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1985 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1986 } else if (type == NORM_INFINITY) { /* max row norm */ 1987 PetscReal ntemp = 0.0; 1988 for (j=0; j<aij->A->rmap->n; j++) { 1989 v = amat->a + amat->i[j]; 1990 sum = 0.0; 1991 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1992 sum += PetscAbsScalar(*v); v++; 1993 } 1994 v = bmat->a + bmat->i[j]; 1995 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1996 sum += PetscAbsScalar(*v); v++; 1997 } 1998 if (sum > ntemp) ntemp = sum; 1999 } 2000 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2001 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2002 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2003 } 2004 PetscFunctionReturn(0); 2005 } 2006 2007 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2008 { 2009 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2010 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2011 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,*B_diag_ilen,*B_diag_i,i,ncol,A_diag_ncol; 2012 PetscErrorCode ierr; 2013 Mat B,A_diag,*B_diag; 2014 MatScalar *array; 2015 2016 PetscFunctionBegin; 2017 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2018 ai = Aloc->i; aj = Aloc->j; 2019 bi = Bloc->i; bj = Bloc->j; 2020 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2021 PetscInt *d_nnz,*g_nnz,*o_nnz; 2022 PetscSFNode *oloc; 2023 PETSC_UNUSED PetscSF sf; 2024 2025 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2026 /* compute d_nnz for preallocation */ 2027 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2028 for (i=0; i<ai[ma]; i++) { 2029 d_nnz[aj[i]]++; 2030 } 2031 /* compute local off-diagonal contributions */ 2032 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 2033 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2034 /* map those to global */ 2035 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2036 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2037 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2038 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 2039 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2040 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2041 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2042 2043 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2044 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2045 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2046 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2047 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2048 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2049 } else { 2050 B = *matout; 2051 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2052 } 2053 2054 b = (Mat_MPIAIJ*)B->data; 2055 A_diag = a->A; 2056 B_diag = &b->A; 2057 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2058 A_diag_ncol = A_diag->cmap->N; 2059 B_diag_ilen = sub_B_diag->ilen; 2060 B_diag_i = sub_B_diag->i; 2061 2062 /* Set ilen for diagonal of B */ 2063 for (i=0; i<A_diag_ncol; i++) { 2064 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2065 } 2066 2067 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2068 very quickly (=without using MatSetValues), because all writes are local. */ 2069 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2070 2071 /* copy over the B part */ 2072 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2073 array = Bloc->a; 2074 row = A->rmap->rstart; 2075 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2076 cols_tmp = cols; 2077 for (i=0; i<mb; i++) { 2078 ncol = bi[i+1]-bi[i]; 2079 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2080 row++; 2081 array += ncol; cols_tmp += ncol; 2082 } 2083 ierr = PetscFree(cols);CHKERRQ(ierr); 2084 2085 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2086 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2087 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2088 *matout = B; 2089 } else { 2090 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2091 } 2092 PetscFunctionReturn(0); 2093 } 2094 2095 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2096 { 2097 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2098 Mat a = aij->A,b = aij->B; 2099 PetscErrorCode ierr; 2100 PetscInt s1,s2,s3; 2101 2102 PetscFunctionBegin; 2103 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2104 if (rr) { 2105 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2106 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2107 /* Overlap communication with computation. */ 2108 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2109 } 2110 if (ll) { 2111 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2112 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2113 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2114 } 2115 /* scale the diagonal block */ 2116 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2117 2118 if (rr) { 2119 /* Do a scatter end and then right scale the off-diagonal block */ 2120 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2121 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2122 } 2123 PetscFunctionReturn(0); 2124 } 2125 2126 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2127 { 2128 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2129 PetscErrorCode ierr; 2130 2131 PetscFunctionBegin; 2132 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2137 { 2138 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2139 Mat a,b,c,d; 2140 PetscBool flg; 2141 PetscErrorCode ierr; 2142 2143 PetscFunctionBegin; 2144 a = matA->A; b = matA->B; 2145 c = matB->A; d = matB->B; 2146 2147 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2148 if (flg) { 2149 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2150 } 2151 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2152 PetscFunctionReturn(0); 2153 } 2154 2155 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2156 { 2157 PetscErrorCode ierr; 2158 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2159 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2160 2161 PetscFunctionBegin; 2162 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2163 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2164 /* because of the column compression in the off-processor part of the matrix a->B, 2165 the number of columns in a->B and b->B may be different, hence we cannot call 2166 the MatCopy() directly on the two parts. If need be, we can provide a more 2167 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2168 then copying the submatrices */ 2169 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2170 } else { 2171 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2172 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2173 } 2174 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2175 PetscFunctionReturn(0); 2176 } 2177 2178 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2179 { 2180 PetscErrorCode ierr; 2181 2182 PetscFunctionBegin; 2183 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 /* 2188 Computes the number of nonzeros per row needed for preallocation when X and Y 2189 have different nonzero structure. 2190 */ 2191 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2192 { 2193 PetscInt i,j,k,nzx,nzy; 2194 2195 PetscFunctionBegin; 2196 /* Set the number of nonzeros in the new matrix */ 2197 for (i=0; i<m; i++) { 2198 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2199 nzx = xi[i+1] - xi[i]; 2200 nzy = yi[i+1] - yi[i]; 2201 nnz[i] = 0; 2202 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2203 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2204 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2205 nnz[i]++; 2206 } 2207 for (; k<nzy; k++) nnz[i]++; 2208 } 2209 PetscFunctionReturn(0); 2210 } 2211 2212 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2213 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2214 { 2215 PetscErrorCode ierr; 2216 PetscInt m = Y->rmap->N; 2217 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2218 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2219 2220 PetscFunctionBegin; 2221 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2222 PetscFunctionReturn(0); 2223 } 2224 2225 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2226 { 2227 PetscErrorCode ierr; 2228 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2229 PetscBLASInt bnz,one=1; 2230 Mat_SeqAIJ *x,*y; 2231 2232 PetscFunctionBegin; 2233 if (str == SAME_NONZERO_PATTERN) { 2234 PetscScalar alpha = a; 2235 x = (Mat_SeqAIJ*)xx->A->data; 2236 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2237 y = (Mat_SeqAIJ*)yy->A->data; 2238 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2239 x = (Mat_SeqAIJ*)xx->B->data; 2240 y = (Mat_SeqAIJ*)yy->B->data; 2241 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2242 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2243 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2244 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2245 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2246 } else { 2247 Mat B; 2248 PetscInt *nnz_d,*nnz_o; 2249 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2250 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2251 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2252 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2253 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2254 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2255 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2256 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2257 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2258 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2259 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2260 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2261 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2262 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2263 } 2264 PetscFunctionReturn(0); 2265 } 2266 2267 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2268 2269 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2270 { 2271 #if defined(PETSC_USE_COMPLEX) 2272 PetscErrorCode ierr; 2273 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2274 2275 PetscFunctionBegin; 2276 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2277 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2278 #else 2279 PetscFunctionBegin; 2280 #endif 2281 PetscFunctionReturn(0); 2282 } 2283 2284 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2285 { 2286 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2291 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2292 PetscFunctionReturn(0); 2293 } 2294 2295 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2296 { 2297 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2298 PetscErrorCode ierr; 2299 2300 PetscFunctionBegin; 2301 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2302 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2307 { 2308 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2309 PetscErrorCode ierr; 2310 PetscInt i,*idxb = 0; 2311 PetscScalar *va,*vb; 2312 Vec vtmp; 2313 2314 PetscFunctionBegin; 2315 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2316 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2317 if (idx) { 2318 for (i=0; i<A->rmap->n; i++) { 2319 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2320 } 2321 } 2322 2323 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2324 if (idx) { 2325 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2326 } 2327 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2328 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2329 2330 for (i=0; i<A->rmap->n; i++) { 2331 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2332 va[i] = vb[i]; 2333 if (idx) idx[i] = a->garray[idxb[i]]; 2334 } 2335 } 2336 2337 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2338 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2339 ierr = PetscFree(idxb);CHKERRQ(ierr); 2340 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2341 PetscFunctionReturn(0); 2342 } 2343 2344 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2345 { 2346 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2347 PetscErrorCode ierr; 2348 PetscInt i,*idxb = 0; 2349 PetscScalar *va,*vb; 2350 Vec vtmp; 2351 2352 PetscFunctionBegin; 2353 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2354 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2355 if (idx) { 2356 for (i=0; i<A->cmap->n; i++) { 2357 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2358 } 2359 } 2360 2361 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2362 if (idx) { 2363 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2364 } 2365 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2366 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2367 2368 for (i=0; i<A->rmap->n; i++) { 2369 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2370 va[i] = vb[i]; 2371 if (idx) idx[i] = a->garray[idxb[i]]; 2372 } 2373 } 2374 2375 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2376 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2377 ierr = PetscFree(idxb);CHKERRQ(ierr); 2378 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2379 PetscFunctionReturn(0); 2380 } 2381 2382 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2383 { 2384 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2385 PetscInt n = A->rmap->n; 2386 PetscInt cstart = A->cmap->rstart; 2387 PetscInt *cmap = mat->garray; 2388 PetscInt *diagIdx, *offdiagIdx; 2389 Vec diagV, offdiagV; 2390 PetscScalar *a, *diagA, *offdiagA; 2391 PetscInt r; 2392 PetscErrorCode ierr; 2393 2394 PetscFunctionBegin; 2395 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2396 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2397 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2398 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2399 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2400 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2401 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2402 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2403 for (r = 0; r < n; ++r) { 2404 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2405 a[r] = diagA[r]; 2406 idx[r] = cstart + diagIdx[r]; 2407 } else { 2408 a[r] = offdiagA[r]; 2409 idx[r] = cmap[offdiagIdx[r]]; 2410 } 2411 } 2412 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2413 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2414 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2415 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2416 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2417 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2422 { 2423 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2424 PetscInt n = A->rmap->n; 2425 PetscInt cstart = A->cmap->rstart; 2426 PetscInt *cmap = mat->garray; 2427 PetscInt *diagIdx, *offdiagIdx; 2428 Vec diagV, offdiagV; 2429 PetscScalar *a, *diagA, *offdiagA; 2430 PetscInt r; 2431 PetscErrorCode ierr; 2432 2433 PetscFunctionBegin; 2434 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2435 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2436 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2437 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2438 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2439 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2440 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2441 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2442 for (r = 0; r < n; ++r) { 2443 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2444 a[r] = diagA[r]; 2445 idx[r] = cstart + diagIdx[r]; 2446 } else { 2447 a[r] = offdiagA[r]; 2448 idx[r] = cmap[offdiagIdx[r]]; 2449 } 2450 } 2451 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2452 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2453 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2454 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2455 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2456 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2457 PetscFunctionReturn(0); 2458 } 2459 2460 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2461 { 2462 PetscErrorCode ierr; 2463 Mat *dummy; 2464 2465 PetscFunctionBegin; 2466 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2467 *newmat = *dummy; 2468 ierr = PetscFree(dummy);CHKERRQ(ierr); 2469 PetscFunctionReturn(0); 2470 } 2471 2472 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2473 { 2474 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2479 A->factorerrortype = a->A->factorerrortype; 2480 PetscFunctionReturn(0); 2481 } 2482 2483 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2484 { 2485 PetscErrorCode ierr; 2486 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2487 2488 PetscFunctionBegin; 2489 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2490 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2491 if (x->assembled) { 2492 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2493 } else { 2494 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2495 } 2496 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2497 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2498 PetscFunctionReturn(0); 2499 } 2500 2501 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2502 { 2503 PetscFunctionBegin; 2504 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2505 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2506 PetscFunctionReturn(0); 2507 } 2508 2509 /*@ 2510 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2511 2512 Collective on Mat 2513 2514 Input Parameters: 2515 + A - the matrix 2516 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2517 2518 Level: advanced 2519 2520 @*/ 2521 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2522 { 2523 PetscErrorCode ierr; 2524 2525 PetscFunctionBegin; 2526 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2531 { 2532 PetscErrorCode ierr; 2533 PetscBool sc = PETSC_FALSE,flg; 2534 2535 PetscFunctionBegin; 2536 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2537 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2538 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2539 if (flg) { 2540 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2541 } 2542 ierr = PetscOptionsTail();CHKERRQ(ierr); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2547 { 2548 PetscErrorCode ierr; 2549 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2550 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2551 2552 PetscFunctionBegin; 2553 if (!Y->preallocated) { 2554 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2555 } else if (!aij->nz) { 2556 PetscInt nonew = aij->nonew; 2557 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2558 aij->nonew = nonew; 2559 } 2560 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2561 PetscFunctionReturn(0); 2562 } 2563 2564 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2565 { 2566 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2567 PetscErrorCode ierr; 2568 2569 PetscFunctionBegin; 2570 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2571 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2572 if (d) { 2573 PetscInt rstart; 2574 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2575 *d += rstart; 2576 2577 } 2578 PetscFunctionReturn(0); 2579 } 2580 2581 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2582 { 2583 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2584 PetscErrorCode ierr; 2585 2586 PetscFunctionBegin; 2587 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2588 PetscFunctionReturn(0); 2589 } 2590 2591 /* -------------------------------------------------------------------*/ 2592 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2593 MatGetRow_MPIAIJ, 2594 MatRestoreRow_MPIAIJ, 2595 MatMult_MPIAIJ, 2596 /* 4*/ MatMultAdd_MPIAIJ, 2597 MatMultTranspose_MPIAIJ, 2598 MatMultTransposeAdd_MPIAIJ, 2599 0, 2600 0, 2601 0, 2602 /*10*/ 0, 2603 0, 2604 0, 2605 MatSOR_MPIAIJ, 2606 MatTranspose_MPIAIJ, 2607 /*15*/ MatGetInfo_MPIAIJ, 2608 MatEqual_MPIAIJ, 2609 MatGetDiagonal_MPIAIJ, 2610 MatDiagonalScale_MPIAIJ, 2611 MatNorm_MPIAIJ, 2612 /*20*/ MatAssemblyBegin_MPIAIJ, 2613 MatAssemblyEnd_MPIAIJ, 2614 MatSetOption_MPIAIJ, 2615 MatZeroEntries_MPIAIJ, 2616 /*24*/ MatZeroRows_MPIAIJ, 2617 0, 2618 0, 2619 0, 2620 0, 2621 /*29*/ MatSetUp_MPIAIJ, 2622 0, 2623 0, 2624 MatGetDiagonalBlock_MPIAIJ, 2625 0, 2626 /*34*/ MatDuplicate_MPIAIJ, 2627 0, 2628 0, 2629 0, 2630 0, 2631 /*39*/ MatAXPY_MPIAIJ, 2632 MatCreateSubMatrices_MPIAIJ, 2633 MatIncreaseOverlap_MPIAIJ, 2634 MatGetValues_MPIAIJ, 2635 MatCopy_MPIAIJ, 2636 /*44*/ MatGetRowMax_MPIAIJ, 2637 MatScale_MPIAIJ, 2638 MatShift_MPIAIJ, 2639 MatDiagonalSet_MPIAIJ, 2640 MatZeroRowsColumns_MPIAIJ, 2641 /*49*/ MatSetRandom_MPIAIJ, 2642 0, 2643 0, 2644 0, 2645 0, 2646 /*54*/ MatFDColoringCreate_MPIXAIJ, 2647 0, 2648 MatSetUnfactored_MPIAIJ, 2649 MatPermute_MPIAIJ, 2650 0, 2651 /*59*/ MatCreateSubMatrix_MPIAIJ, 2652 MatDestroy_MPIAIJ, 2653 MatView_MPIAIJ, 2654 0, 2655 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2656 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2657 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2658 0, 2659 0, 2660 0, 2661 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2662 MatGetRowMinAbs_MPIAIJ, 2663 0, 2664 0, 2665 0, 2666 0, 2667 /*75*/ MatFDColoringApply_AIJ, 2668 MatSetFromOptions_MPIAIJ, 2669 0, 2670 0, 2671 MatFindZeroDiagonals_MPIAIJ, 2672 /*80*/ 0, 2673 0, 2674 0, 2675 /*83*/ MatLoad_MPIAIJ, 2676 MatIsSymmetric_MPIAIJ, 2677 0, 2678 0, 2679 0, 2680 0, 2681 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2682 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2683 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2684 MatPtAP_MPIAIJ_MPIAIJ, 2685 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2686 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2687 0, 2688 0, 2689 0, 2690 0, 2691 /*99*/ 0, 2692 0, 2693 0, 2694 MatConjugate_MPIAIJ, 2695 0, 2696 /*104*/MatSetValuesRow_MPIAIJ, 2697 MatRealPart_MPIAIJ, 2698 MatImaginaryPart_MPIAIJ, 2699 0, 2700 0, 2701 /*109*/0, 2702 0, 2703 MatGetRowMin_MPIAIJ, 2704 0, 2705 MatMissingDiagonal_MPIAIJ, 2706 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2707 0, 2708 MatGetGhosts_MPIAIJ, 2709 0, 2710 0, 2711 /*119*/0, 2712 0, 2713 0, 2714 0, 2715 MatGetMultiProcBlock_MPIAIJ, 2716 /*124*/MatFindNonzeroRows_MPIAIJ, 2717 MatGetColumnNorms_MPIAIJ, 2718 MatInvertBlockDiagonal_MPIAIJ, 2719 MatInvertVariableBlockDiagonal_MPIAIJ, 2720 MatCreateSubMatricesMPI_MPIAIJ, 2721 /*129*/0, 2722 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2723 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2724 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2725 0, 2726 /*134*/0, 2727 0, 2728 MatRARt_MPIAIJ_MPIAIJ, 2729 0, 2730 0, 2731 /*139*/MatSetBlockSizes_MPIAIJ, 2732 0, 2733 0, 2734 MatFDColoringSetUp_MPIXAIJ, 2735 MatFindOffBlockDiagonalEntries_MPIAIJ, 2736 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2737 }; 2738 2739 /* ----------------------------------------------------------------------------------------*/ 2740 2741 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2742 { 2743 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2744 PetscErrorCode ierr; 2745 2746 PetscFunctionBegin; 2747 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2748 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2749 PetscFunctionReturn(0); 2750 } 2751 2752 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2753 { 2754 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2755 PetscErrorCode ierr; 2756 2757 PetscFunctionBegin; 2758 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2759 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2760 PetscFunctionReturn(0); 2761 } 2762 2763 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2764 { 2765 Mat_MPIAIJ *b; 2766 PetscErrorCode ierr; 2767 PetscMPIInt size; 2768 2769 PetscFunctionBegin; 2770 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2771 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2772 b = (Mat_MPIAIJ*)B->data; 2773 2774 #if defined(PETSC_USE_CTABLE) 2775 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2776 #else 2777 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2778 #endif 2779 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2780 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2781 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2782 2783 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2784 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2785 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2786 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2787 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2788 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2789 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2790 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2791 2792 if (!B->preallocated) { 2793 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2794 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2795 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2796 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2797 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2798 } 2799 2800 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2801 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2802 B->preallocated = PETSC_TRUE; 2803 B->was_assembled = PETSC_FALSE; 2804 B->assembled = PETSC_FALSE; 2805 PetscFunctionReturn(0); 2806 } 2807 2808 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2809 { 2810 Mat_MPIAIJ *b; 2811 PetscErrorCode ierr; 2812 2813 PetscFunctionBegin; 2814 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2815 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2816 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2817 b = (Mat_MPIAIJ*)B->data; 2818 2819 #if defined(PETSC_USE_CTABLE) 2820 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2821 #else 2822 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2823 #endif 2824 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2825 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2826 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2827 2828 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2829 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2830 B->preallocated = PETSC_TRUE; 2831 B->was_assembled = PETSC_FALSE; 2832 B->assembled = PETSC_FALSE; 2833 PetscFunctionReturn(0); 2834 } 2835 2836 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2837 { 2838 Mat mat; 2839 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2840 PetscErrorCode ierr; 2841 2842 PetscFunctionBegin; 2843 *newmat = 0; 2844 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2845 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2846 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2847 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2848 a = (Mat_MPIAIJ*)mat->data; 2849 2850 mat->factortype = matin->factortype; 2851 mat->assembled = PETSC_TRUE; 2852 mat->insertmode = NOT_SET_VALUES; 2853 mat->preallocated = PETSC_TRUE; 2854 2855 a->size = oldmat->size; 2856 a->rank = oldmat->rank; 2857 a->donotstash = oldmat->donotstash; 2858 a->roworiented = oldmat->roworiented; 2859 a->rowindices = 0; 2860 a->rowvalues = 0; 2861 a->getrowactive = PETSC_FALSE; 2862 2863 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2864 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2865 2866 if (oldmat->colmap) { 2867 #if defined(PETSC_USE_CTABLE) 2868 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2869 #else 2870 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2871 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2872 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2873 #endif 2874 } else a->colmap = 0; 2875 if (oldmat->garray) { 2876 PetscInt len; 2877 len = oldmat->B->cmap->n; 2878 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2879 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2880 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2881 } else a->garray = 0; 2882 2883 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2884 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2885 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2886 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2887 2888 if (oldmat->Mvctx_mpi1) { 2889 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2890 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2891 } 2892 2893 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2894 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2895 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2896 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2897 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2898 *newmat = mat; 2899 PetscFunctionReturn(0); 2900 } 2901 2902 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2903 { 2904 PetscBool isbinary, ishdf5; 2905 PetscErrorCode ierr; 2906 2907 PetscFunctionBegin; 2908 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2909 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2910 /* force binary viewer to load .info file if it has not yet done so */ 2911 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2912 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2913 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2914 if (isbinary) { 2915 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2916 } else if (ishdf5) { 2917 #if defined(PETSC_HAVE_HDF5) 2918 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2919 #else 2920 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2921 #endif 2922 } else { 2923 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2924 } 2925 PetscFunctionReturn(0); 2926 } 2927 2928 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2929 { 2930 PetscScalar *vals,*svals; 2931 MPI_Comm comm; 2932 PetscErrorCode ierr; 2933 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2934 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2935 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2936 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2937 PetscInt cend,cstart,n,*rowners; 2938 int fd; 2939 PetscInt bs = newMat->rmap->bs; 2940 2941 PetscFunctionBegin; 2942 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2943 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2944 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2945 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2946 if (!rank) { 2947 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2948 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2949 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2950 } 2951 2952 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2953 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2954 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2955 if (bs < 0) bs = 1; 2956 2957 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2958 M = header[1]; N = header[2]; 2959 2960 /* If global sizes are set, check if they are consistent with that given in the file */ 2961 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2962 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2963 2964 /* determine ownership of all (block) rows */ 2965 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2966 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2967 else m = newMat->rmap->n; /* Set by user */ 2968 2969 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2970 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2971 2972 /* First process needs enough room for process with most rows */ 2973 if (!rank) { 2974 mmax = rowners[1]; 2975 for (i=2; i<=size; i++) { 2976 mmax = PetscMax(mmax, rowners[i]); 2977 } 2978 } else mmax = -1; /* unused, but compilers complain */ 2979 2980 rowners[0] = 0; 2981 for (i=2; i<=size; i++) { 2982 rowners[i] += rowners[i-1]; 2983 } 2984 rstart = rowners[rank]; 2985 rend = rowners[rank+1]; 2986 2987 /* distribute row lengths to all processors */ 2988 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2989 if (!rank) { 2990 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 2991 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2992 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2993 for (j=0; j<m; j++) { 2994 procsnz[0] += ourlens[j]; 2995 } 2996 for (i=1; i<size; i++) { 2997 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 2998 /* calculate the number of nonzeros on each processor */ 2999 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3000 procsnz[i] += rowlengths[j]; 3001 } 3002 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3003 } 3004 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3005 } else { 3006 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3007 } 3008 3009 if (!rank) { 3010 /* determine max buffer needed and allocate it */ 3011 maxnz = 0; 3012 for (i=0; i<size; i++) { 3013 maxnz = PetscMax(maxnz,procsnz[i]); 3014 } 3015 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3016 3017 /* read in my part of the matrix column indices */ 3018 nz = procsnz[0]; 3019 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3020 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3021 3022 /* read in every one elses and ship off */ 3023 for (i=1; i<size; i++) { 3024 nz = procsnz[i]; 3025 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3026 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3027 } 3028 ierr = PetscFree(cols);CHKERRQ(ierr); 3029 } else { 3030 /* determine buffer space needed for message */ 3031 nz = 0; 3032 for (i=0; i<m; i++) { 3033 nz += ourlens[i]; 3034 } 3035 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3036 3037 /* receive message of column indices*/ 3038 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3039 } 3040 3041 /* determine column ownership if matrix is not square */ 3042 if (N != M) { 3043 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3044 else n = newMat->cmap->n; 3045 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3046 cstart = cend - n; 3047 } else { 3048 cstart = rstart; 3049 cend = rend; 3050 n = cend - cstart; 3051 } 3052 3053 /* loop over local rows, determining number of off diagonal entries */ 3054 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 3055 jj = 0; 3056 for (i=0; i<m; i++) { 3057 for (j=0; j<ourlens[i]; j++) { 3058 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3059 jj++; 3060 } 3061 } 3062 3063 for (i=0; i<m; i++) { 3064 ourlens[i] -= offlens[i]; 3065 } 3066 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3067 3068 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3069 3070 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3071 3072 for (i=0; i<m; i++) { 3073 ourlens[i] += offlens[i]; 3074 } 3075 3076 if (!rank) { 3077 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3078 3079 /* read in my part of the matrix numerical values */ 3080 nz = procsnz[0]; 3081 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3082 3083 /* insert into matrix */ 3084 jj = rstart; 3085 smycols = mycols; 3086 svals = vals; 3087 for (i=0; i<m; i++) { 3088 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3089 smycols += ourlens[i]; 3090 svals += ourlens[i]; 3091 jj++; 3092 } 3093 3094 /* read in other processors and ship out */ 3095 for (i=1; i<size; i++) { 3096 nz = procsnz[i]; 3097 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3098 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3099 } 3100 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3101 } else { 3102 /* receive numeric values */ 3103 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3104 3105 /* receive message of values*/ 3106 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3107 3108 /* insert into matrix */ 3109 jj = rstart; 3110 smycols = mycols; 3111 svals = vals; 3112 for (i=0; i<m; i++) { 3113 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3114 smycols += ourlens[i]; 3115 svals += ourlens[i]; 3116 jj++; 3117 } 3118 } 3119 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3120 ierr = PetscFree(vals);CHKERRQ(ierr); 3121 ierr = PetscFree(mycols);CHKERRQ(ierr); 3122 ierr = PetscFree(rowners);CHKERRQ(ierr); 3123 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3124 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3125 PetscFunctionReturn(0); 3126 } 3127 3128 /* Not scalable because of ISAllGather() unless getting all columns. */ 3129 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3130 { 3131 PetscErrorCode ierr; 3132 IS iscol_local; 3133 PetscBool isstride; 3134 PetscMPIInt lisstride=0,gisstride; 3135 3136 PetscFunctionBegin; 3137 /* check if we are grabbing all columns*/ 3138 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3139 3140 if (isstride) { 3141 PetscInt start,len,mstart,mlen; 3142 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3143 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3144 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3145 if (mstart == start && mlen-mstart == len) lisstride = 1; 3146 } 3147 3148 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3149 if (gisstride) { 3150 PetscInt N; 3151 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3152 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3153 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3154 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3155 } else { 3156 PetscInt cbs; 3157 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3158 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3159 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3160 } 3161 3162 *isseq = iscol_local; 3163 PetscFunctionReturn(0); 3164 } 3165 3166 /* 3167 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3168 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3169 3170 Input Parameters: 3171 mat - matrix 3172 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3173 i.e., mat->rstart <= isrow[i] < mat->rend 3174 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3175 i.e., mat->cstart <= iscol[i] < mat->cend 3176 Output Parameter: 3177 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3178 iscol_o - sequential column index set for retrieving mat->B 3179 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3180 */ 3181 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3182 { 3183 PetscErrorCode ierr; 3184 Vec x,cmap; 3185 const PetscInt *is_idx; 3186 PetscScalar *xarray,*cmaparray; 3187 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3188 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3189 Mat B=a->B; 3190 Vec lvec=a->lvec,lcmap; 3191 PetscInt i,cstart,cend,Bn=B->cmap->N; 3192 MPI_Comm comm; 3193 VecScatter Mvctx=a->Mvctx; 3194 3195 PetscFunctionBegin; 3196 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3197 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3198 3199 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3200 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3201 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3202 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3203 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3204 3205 /* Get start indices */ 3206 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3207 isstart -= ncols; 3208 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3209 3210 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3211 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3212 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3213 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3214 for (i=0; i<ncols; i++) { 3215 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3216 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3217 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3218 } 3219 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3220 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3221 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3222 3223 /* Get iscol_d */ 3224 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3225 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3226 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3227 3228 /* Get isrow_d */ 3229 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3230 rstart = mat->rmap->rstart; 3231 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3232 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3233 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3234 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3235 3236 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3237 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3238 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3239 3240 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3241 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3242 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3243 3244 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3245 3246 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3247 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3248 3249 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3250 /* off-process column indices */ 3251 count = 0; 3252 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3253 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3254 3255 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3256 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3257 for (i=0; i<Bn; i++) { 3258 if (PetscRealPart(xarray[i]) > -1.0) { 3259 idx[count] = i; /* local column index in off-diagonal part B */ 3260 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3261 count++; 3262 } 3263 } 3264 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3265 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3266 3267 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3268 /* cannot ensure iscol_o has same blocksize as iscol! */ 3269 3270 ierr = PetscFree(idx);CHKERRQ(ierr); 3271 *garray = cmap1; 3272 3273 ierr = VecDestroy(&x);CHKERRQ(ierr); 3274 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3275 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3276 PetscFunctionReturn(0); 3277 } 3278 3279 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3280 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3281 { 3282 PetscErrorCode ierr; 3283 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3284 Mat M = NULL; 3285 MPI_Comm comm; 3286 IS iscol_d,isrow_d,iscol_o; 3287 Mat Asub = NULL,Bsub = NULL; 3288 PetscInt n; 3289 3290 PetscFunctionBegin; 3291 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3292 3293 if (call == MAT_REUSE_MATRIX) { 3294 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3295 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3296 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3297 3298 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3299 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3300 3301 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3302 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3303 3304 /* Update diagonal and off-diagonal portions of submat */ 3305 asub = (Mat_MPIAIJ*)(*submat)->data; 3306 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3307 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3308 if (n) { 3309 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3310 } 3311 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3312 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3313 3314 } else { /* call == MAT_INITIAL_MATRIX) */ 3315 const PetscInt *garray; 3316 PetscInt BsubN; 3317 3318 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3319 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3320 3321 /* Create local submatrices Asub and Bsub */ 3322 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3323 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3324 3325 /* Create submatrix M */ 3326 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3327 3328 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3329 asub = (Mat_MPIAIJ*)M->data; 3330 3331 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3332 n = asub->B->cmap->N; 3333 if (BsubN > n) { 3334 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3335 const PetscInt *idx; 3336 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3337 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3338 3339 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3340 j = 0; 3341 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3342 for (i=0; i<n; i++) { 3343 if (j >= BsubN) break; 3344 while (subgarray[i] > garray[j]) j++; 3345 3346 if (subgarray[i] == garray[j]) { 3347 idx_new[i] = idx[j++]; 3348 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3349 } 3350 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3351 3352 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3353 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3354 3355 } else if (BsubN < n) { 3356 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3357 } 3358 3359 ierr = PetscFree(garray);CHKERRQ(ierr); 3360 *submat = M; 3361 3362 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3363 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3364 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3365 3366 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3367 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3368 3369 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3370 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3371 } 3372 PetscFunctionReturn(0); 3373 } 3374 3375 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3376 { 3377 PetscErrorCode ierr; 3378 IS iscol_local=NULL,isrow_d; 3379 PetscInt csize; 3380 PetscInt n,i,j,start,end; 3381 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3382 MPI_Comm comm; 3383 3384 PetscFunctionBegin; 3385 /* If isrow has same processor distribution as mat, 3386 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3387 if (call == MAT_REUSE_MATRIX) { 3388 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3389 if (isrow_d) { 3390 sameRowDist = PETSC_TRUE; 3391 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3392 } else { 3393 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3394 if (iscol_local) { 3395 sameRowDist = PETSC_TRUE; 3396 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3397 } 3398 } 3399 } else { 3400 /* Check if isrow has same processor distribution as mat */ 3401 sameDist[0] = PETSC_FALSE; 3402 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3403 if (!n) { 3404 sameDist[0] = PETSC_TRUE; 3405 } else { 3406 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3407 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3408 if (i >= start && j < end) { 3409 sameDist[0] = PETSC_TRUE; 3410 } 3411 } 3412 3413 /* Check if iscol has same processor distribution as mat */ 3414 sameDist[1] = PETSC_FALSE; 3415 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3416 if (!n) { 3417 sameDist[1] = PETSC_TRUE; 3418 } else { 3419 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3420 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3421 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3422 } 3423 3424 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3425 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3426 sameRowDist = tsameDist[0]; 3427 } 3428 3429 if (sameRowDist) { 3430 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3431 /* isrow and iscol have same processor distribution as mat */ 3432 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3433 PetscFunctionReturn(0); 3434 } else { /* sameRowDist */ 3435 /* isrow has same processor distribution as mat */ 3436 if (call == MAT_INITIAL_MATRIX) { 3437 PetscBool sorted; 3438 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3439 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3440 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3441 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3442 3443 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3444 if (sorted) { 3445 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3446 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3447 PetscFunctionReturn(0); 3448 } 3449 } else { /* call == MAT_REUSE_MATRIX */ 3450 IS iscol_sub; 3451 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3452 if (iscol_sub) { 3453 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3454 PetscFunctionReturn(0); 3455 } 3456 } 3457 } 3458 } 3459 3460 /* General case: iscol -> iscol_local which has global size of iscol */ 3461 if (call == MAT_REUSE_MATRIX) { 3462 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3463 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3464 } else { 3465 if (!iscol_local) { 3466 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3467 } 3468 } 3469 3470 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3471 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3472 3473 if (call == MAT_INITIAL_MATRIX) { 3474 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3475 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3476 } 3477 PetscFunctionReturn(0); 3478 } 3479 3480 /*@C 3481 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3482 and "off-diagonal" part of the matrix in CSR format. 3483 3484 Collective on MPI_Comm 3485 3486 Input Parameters: 3487 + comm - MPI communicator 3488 . A - "diagonal" portion of matrix 3489 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3490 - garray - global index of B columns 3491 3492 Output Parameter: 3493 . mat - the matrix, with input A as its local diagonal matrix 3494 Level: advanced 3495 3496 Notes: 3497 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3498 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3499 3500 .seealso: MatCreateMPIAIJWithSplitArrays() 3501 @*/ 3502 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3503 { 3504 PetscErrorCode ierr; 3505 Mat_MPIAIJ *maij; 3506 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3507 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3508 PetscScalar *oa=b->a; 3509 Mat Bnew; 3510 PetscInt m,n,N; 3511 3512 PetscFunctionBegin; 3513 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3514 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3515 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3516 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3517 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3518 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3519 3520 /* Get global columns of mat */ 3521 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3522 3523 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3524 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3525 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3526 maij = (Mat_MPIAIJ*)(*mat)->data; 3527 3528 (*mat)->preallocated = PETSC_TRUE; 3529 3530 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3531 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3532 3533 /* Set A as diagonal portion of *mat */ 3534 maij->A = A; 3535 3536 nz = oi[m]; 3537 for (i=0; i<nz; i++) { 3538 col = oj[i]; 3539 oj[i] = garray[col]; 3540 } 3541 3542 /* Set Bnew as off-diagonal portion of *mat */ 3543 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3544 bnew = (Mat_SeqAIJ*)Bnew->data; 3545 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3546 maij->B = Bnew; 3547 3548 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3549 3550 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3551 b->free_a = PETSC_FALSE; 3552 b->free_ij = PETSC_FALSE; 3553 ierr = MatDestroy(&B);CHKERRQ(ierr); 3554 3555 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3556 bnew->free_a = PETSC_TRUE; 3557 bnew->free_ij = PETSC_TRUE; 3558 3559 /* condense columns of maij->B */ 3560 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3561 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3562 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3563 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3564 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3565 PetscFunctionReturn(0); 3566 } 3567 3568 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3569 3570 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3571 { 3572 PetscErrorCode ierr; 3573 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3574 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3575 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3576 Mat M,Msub,B=a->B; 3577 MatScalar *aa; 3578 Mat_SeqAIJ *aij; 3579 PetscInt *garray = a->garray,*colsub,Ncols; 3580 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3581 IS iscol_sub,iscmap; 3582 const PetscInt *is_idx,*cmap; 3583 PetscBool allcolumns=PETSC_FALSE; 3584 MPI_Comm comm; 3585 3586 PetscFunctionBegin; 3587 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3588 3589 if (call == MAT_REUSE_MATRIX) { 3590 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3591 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3592 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3593 3594 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3595 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3596 3597 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3598 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3599 3600 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3601 3602 } else { /* call == MAT_INITIAL_MATRIX) */ 3603 PetscBool flg; 3604 3605 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3606 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3607 3608 /* (1) iscol -> nonscalable iscol_local */ 3609 /* Check for special case: each processor gets entire matrix columns */ 3610 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3611 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3612 if (allcolumns) { 3613 iscol_sub = iscol_local; 3614 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3615 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3616 3617 } else { 3618 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3619 PetscInt *idx,*cmap1,k; 3620 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3621 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3622 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3623 count = 0; 3624 k = 0; 3625 for (i=0; i<Ncols; i++) { 3626 j = is_idx[i]; 3627 if (j >= cstart && j < cend) { 3628 /* diagonal part of mat */ 3629 idx[count] = j; 3630 cmap1[count++] = i; /* column index in submat */ 3631 } else if (Bn) { 3632 /* off-diagonal part of mat */ 3633 if (j == garray[k]) { 3634 idx[count] = j; 3635 cmap1[count++] = i; /* column index in submat */ 3636 } else if (j > garray[k]) { 3637 while (j > garray[k] && k < Bn-1) k++; 3638 if (j == garray[k]) { 3639 idx[count] = j; 3640 cmap1[count++] = i; /* column index in submat */ 3641 } 3642 } 3643 } 3644 } 3645 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3646 3647 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3648 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3649 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3650 3651 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3652 } 3653 3654 /* (3) Create sequential Msub */ 3655 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3656 } 3657 3658 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3659 aij = (Mat_SeqAIJ*)(Msub)->data; 3660 ii = aij->i; 3661 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3662 3663 /* 3664 m - number of local rows 3665 Ncols - number of columns (same on all processors) 3666 rstart - first row in new global matrix generated 3667 */ 3668 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3669 3670 if (call == MAT_INITIAL_MATRIX) { 3671 /* (4) Create parallel newmat */ 3672 PetscMPIInt rank,size; 3673 PetscInt csize; 3674 3675 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3676 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3677 3678 /* 3679 Determine the number of non-zeros in the diagonal and off-diagonal 3680 portions of the matrix in order to do correct preallocation 3681 */ 3682 3683 /* first get start and end of "diagonal" columns */ 3684 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3685 if (csize == PETSC_DECIDE) { 3686 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3687 if (mglobal == Ncols) { /* square matrix */ 3688 nlocal = m; 3689 } else { 3690 nlocal = Ncols/size + ((Ncols % size) > rank); 3691 } 3692 } else { 3693 nlocal = csize; 3694 } 3695 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3696 rstart = rend - nlocal; 3697 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3698 3699 /* next, compute all the lengths */ 3700 jj = aij->j; 3701 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3702 olens = dlens + m; 3703 for (i=0; i<m; i++) { 3704 jend = ii[i+1] - ii[i]; 3705 olen = 0; 3706 dlen = 0; 3707 for (j=0; j<jend; j++) { 3708 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3709 else dlen++; 3710 jj++; 3711 } 3712 olens[i] = olen; 3713 dlens[i] = dlen; 3714 } 3715 3716 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3717 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3718 3719 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3720 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3721 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3722 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3723 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3724 ierr = PetscFree(dlens);CHKERRQ(ierr); 3725 3726 } else { /* call == MAT_REUSE_MATRIX */ 3727 M = *newmat; 3728 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3729 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3730 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3731 /* 3732 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3733 rather than the slower MatSetValues(). 3734 */ 3735 M->was_assembled = PETSC_TRUE; 3736 M->assembled = PETSC_FALSE; 3737 } 3738 3739 /* (5) Set values of Msub to *newmat */ 3740 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3741 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3742 3743 jj = aij->j; 3744 aa = aij->a; 3745 for (i=0; i<m; i++) { 3746 row = rstart + i; 3747 nz = ii[i+1] - ii[i]; 3748 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3749 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3750 jj += nz; aa += nz; 3751 } 3752 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3753 3754 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3755 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3756 3757 ierr = PetscFree(colsub);CHKERRQ(ierr); 3758 3759 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3760 if (call == MAT_INITIAL_MATRIX) { 3761 *newmat = M; 3762 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3763 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3764 3765 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3766 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3767 3768 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3769 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3770 3771 if (iscol_local) { 3772 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3773 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3774 } 3775 } 3776 PetscFunctionReturn(0); 3777 } 3778 3779 /* 3780 Not great since it makes two copies of the submatrix, first an SeqAIJ 3781 in local and then by concatenating the local matrices the end result. 3782 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3783 3784 Note: This requires a sequential iscol with all indices. 3785 */ 3786 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3787 { 3788 PetscErrorCode ierr; 3789 PetscMPIInt rank,size; 3790 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3791 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3792 Mat M,Mreuse; 3793 MatScalar *aa,*vwork; 3794 MPI_Comm comm; 3795 Mat_SeqAIJ *aij; 3796 PetscBool colflag,allcolumns=PETSC_FALSE; 3797 3798 PetscFunctionBegin; 3799 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3800 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3801 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3802 3803 /* Check for special case: each processor gets entire matrix columns */ 3804 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3805 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3806 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3807 3808 if (call == MAT_REUSE_MATRIX) { 3809 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3810 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3811 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3812 } else { 3813 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3814 } 3815 3816 /* 3817 m - number of local rows 3818 n - number of columns (same on all processors) 3819 rstart - first row in new global matrix generated 3820 */ 3821 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3822 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3823 if (call == MAT_INITIAL_MATRIX) { 3824 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3825 ii = aij->i; 3826 jj = aij->j; 3827 3828 /* 3829 Determine the number of non-zeros in the diagonal and off-diagonal 3830 portions of the matrix in order to do correct preallocation 3831 */ 3832 3833 /* first get start and end of "diagonal" columns */ 3834 if (csize == PETSC_DECIDE) { 3835 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3836 if (mglobal == n) { /* square matrix */ 3837 nlocal = m; 3838 } else { 3839 nlocal = n/size + ((n % size) > rank); 3840 } 3841 } else { 3842 nlocal = csize; 3843 } 3844 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3845 rstart = rend - nlocal; 3846 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3847 3848 /* next, compute all the lengths */ 3849 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3850 olens = dlens + m; 3851 for (i=0; i<m; i++) { 3852 jend = ii[i+1] - ii[i]; 3853 olen = 0; 3854 dlen = 0; 3855 for (j=0; j<jend; j++) { 3856 if (*jj < rstart || *jj >= rend) olen++; 3857 else dlen++; 3858 jj++; 3859 } 3860 olens[i] = olen; 3861 dlens[i] = dlen; 3862 } 3863 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3864 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3865 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3866 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3867 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3868 ierr = PetscFree(dlens);CHKERRQ(ierr); 3869 } else { 3870 PetscInt ml,nl; 3871 3872 M = *newmat; 3873 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3874 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3875 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3876 /* 3877 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3878 rather than the slower MatSetValues(). 3879 */ 3880 M->was_assembled = PETSC_TRUE; 3881 M->assembled = PETSC_FALSE; 3882 } 3883 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3884 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3885 ii = aij->i; 3886 jj = aij->j; 3887 aa = aij->a; 3888 for (i=0; i<m; i++) { 3889 row = rstart + i; 3890 nz = ii[i+1] - ii[i]; 3891 cwork = jj; jj += nz; 3892 vwork = aa; aa += nz; 3893 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3894 } 3895 3896 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3897 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3898 *newmat = M; 3899 3900 /* save submatrix used in processor for next request */ 3901 if (call == MAT_INITIAL_MATRIX) { 3902 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3903 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3904 } 3905 PetscFunctionReturn(0); 3906 } 3907 3908 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3909 { 3910 PetscInt m,cstart, cend,j,nnz,i,d; 3911 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3912 const PetscInt *JJ; 3913 PetscScalar *values; 3914 PetscErrorCode ierr; 3915 PetscBool nooffprocentries; 3916 3917 PetscFunctionBegin; 3918 if (Ii && Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3919 3920 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3921 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3922 m = B->rmap->n; 3923 cstart = B->cmap->rstart; 3924 cend = B->cmap->rend; 3925 rstart = B->rmap->rstart; 3926 3927 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3928 3929 #if defined(PETSC_USE_DEBUG) 3930 for (i=0; i<m && Ii; i++) { 3931 nnz = Ii[i+1]- Ii[i]; 3932 JJ = J + Ii[i]; 3933 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3934 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3935 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3936 } 3937 #endif 3938 3939 for (i=0; i<m && Ii; i++) { 3940 nnz = Ii[i+1]- Ii[i]; 3941 JJ = J + Ii[i]; 3942 nnz_max = PetscMax(nnz_max,nnz); 3943 d = 0; 3944 for (j=0; j<nnz; j++) { 3945 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3946 } 3947 d_nnz[i] = d; 3948 o_nnz[i] = nnz - d; 3949 } 3950 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3951 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3952 3953 if (v) values = (PetscScalar*)v; 3954 else { 3955 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3956 } 3957 3958 for (i=0; i<m && Ii; i++) { 3959 ii = i + rstart; 3960 nnz = Ii[i+1]- Ii[i]; 3961 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3962 } 3963 nooffprocentries = B->nooffprocentries; 3964 B->nooffprocentries = PETSC_TRUE; 3965 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3966 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3967 B->nooffprocentries = nooffprocentries; 3968 3969 if (!v) { 3970 ierr = PetscFree(values);CHKERRQ(ierr); 3971 } 3972 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3973 PetscFunctionReturn(0); 3974 } 3975 3976 /*@ 3977 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3978 (the default parallel PETSc format). 3979 3980 Collective on MPI_Comm 3981 3982 Input Parameters: 3983 + B - the matrix 3984 . i - the indices into j for the start of each local row (starts with zero) 3985 . j - the column indices for each local row (starts with zero) 3986 - v - optional values in the matrix 3987 3988 Level: developer 3989 3990 Notes: 3991 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3992 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3993 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3994 3995 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3996 3997 The format which is used for the sparse matrix input, is equivalent to a 3998 row-major ordering.. i.e for the following matrix, the input data expected is 3999 as shown 4000 4001 $ 1 0 0 4002 $ 2 0 3 P0 4003 $ ------- 4004 $ 4 5 6 P1 4005 $ 4006 $ Process0 [P0]: rows_owned=[0,1] 4007 $ i = {0,1,3} [size = nrow+1 = 2+1] 4008 $ j = {0,0,2} [size = 3] 4009 $ v = {1,2,3} [size = 3] 4010 $ 4011 $ Process1 [P1]: rows_owned=[2] 4012 $ i = {0,3} [size = nrow+1 = 1+1] 4013 $ j = {0,1,2} [size = 3] 4014 $ v = {4,5,6} [size = 3] 4015 4016 .keywords: matrix, aij, compressed row, sparse, parallel 4017 4018 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4019 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4020 @*/ 4021 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4022 { 4023 PetscErrorCode ierr; 4024 4025 PetscFunctionBegin; 4026 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4027 PetscFunctionReturn(0); 4028 } 4029 4030 /*@C 4031 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4032 (the default parallel PETSc format). For good matrix assembly performance 4033 the user should preallocate the matrix storage by setting the parameters 4034 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4035 performance can be increased by more than a factor of 50. 4036 4037 Collective on MPI_Comm 4038 4039 Input Parameters: 4040 + B - the matrix 4041 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4042 (same value is used for all local rows) 4043 . d_nnz - array containing the number of nonzeros in the various rows of the 4044 DIAGONAL portion of the local submatrix (possibly different for each row) 4045 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4046 The size of this array is equal to the number of local rows, i.e 'm'. 4047 For matrices that will be factored, you must leave room for (and set) 4048 the diagonal entry even if it is zero. 4049 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4050 submatrix (same value is used for all local rows). 4051 - o_nnz - array containing the number of nonzeros in the various rows of the 4052 OFF-DIAGONAL portion of the local submatrix (possibly different for 4053 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4054 structure. The size of this array is equal to the number 4055 of local rows, i.e 'm'. 4056 4057 If the *_nnz parameter is given then the *_nz parameter is ignored 4058 4059 The AIJ format (also called the Yale sparse matrix format or 4060 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4061 storage. The stored row and column indices begin with zero. 4062 See Users-Manual: ch_mat for details. 4063 4064 The parallel matrix is partitioned such that the first m0 rows belong to 4065 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4066 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4067 4068 The DIAGONAL portion of the local submatrix of a processor can be defined 4069 as the submatrix which is obtained by extraction the part corresponding to 4070 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4071 first row that belongs to the processor, r2 is the last row belonging to 4072 the this processor, and c1-c2 is range of indices of the local part of a 4073 vector suitable for applying the matrix to. This is an mxn matrix. In the 4074 common case of a square matrix, the row and column ranges are the same and 4075 the DIAGONAL part is also square. The remaining portion of the local 4076 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4077 4078 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4079 4080 You can call MatGetInfo() to get information on how effective the preallocation was; 4081 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4082 You can also run with the option -info and look for messages with the string 4083 malloc in them to see if additional memory allocation was needed. 4084 4085 Example usage: 4086 4087 Consider the following 8x8 matrix with 34 non-zero values, that is 4088 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4089 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4090 as follows: 4091 4092 .vb 4093 1 2 0 | 0 3 0 | 0 4 4094 Proc0 0 5 6 | 7 0 0 | 8 0 4095 9 0 10 | 11 0 0 | 12 0 4096 ------------------------------------- 4097 13 0 14 | 15 16 17 | 0 0 4098 Proc1 0 18 0 | 19 20 21 | 0 0 4099 0 0 0 | 22 23 0 | 24 0 4100 ------------------------------------- 4101 Proc2 25 26 27 | 0 0 28 | 29 0 4102 30 0 0 | 31 32 33 | 0 34 4103 .ve 4104 4105 This can be represented as a collection of submatrices as: 4106 4107 .vb 4108 A B C 4109 D E F 4110 G H I 4111 .ve 4112 4113 Where the submatrices A,B,C are owned by proc0, D,E,F are 4114 owned by proc1, G,H,I are owned by proc2. 4115 4116 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4117 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4118 The 'M','N' parameters are 8,8, and have the same values on all procs. 4119 4120 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4121 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4122 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4123 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4124 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4125 matrix, ans [DF] as another SeqAIJ matrix. 4126 4127 When d_nz, o_nz parameters are specified, d_nz storage elements are 4128 allocated for every row of the local diagonal submatrix, and o_nz 4129 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4130 One way to choose d_nz and o_nz is to use the max nonzerors per local 4131 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4132 In this case, the values of d_nz,o_nz are: 4133 .vb 4134 proc0 : dnz = 2, o_nz = 2 4135 proc1 : dnz = 3, o_nz = 2 4136 proc2 : dnz = 1, o_nz = 4 4137 .ve 4138 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4139 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4140 for proc3. i.e we are using 12+15+10=37 storage locations to store 4141 34 values. 4142 4143 When d_nnz, o_nnz parameters are specified, the storage is specified 4144 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4145 In the above case the values for d_nnz,o_nnz are: 4146 .vb 4147 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4148 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4149 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4150 .ve 4151 Here the space allocated is sum of all the above values i.e 34, and 4152 hence pre-allocation is perfect. 4153 4154 Level: intermediate 4155 4156 .keywords: matrix, aij, compressed row, sparse, parallel 4157 4158 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4159 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4160 @*/ 4161 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4162 { 4163 PetscErrorCode ierr; 4164 4165 PetscFunctionBegin; 4166 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4167 PetscValidType(B,1); 4168 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4169 PetscFunctionReturn(0); 4170 } 4171 4172 /*@ 4173 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4174 CSR format the local rows. 4175 4176 Collective on MPI_Comm 4177 4178 Input Parameters: 4179 + comm - MPI communicator 4180 . m - number of local rows (Cannot be PETSC_DECIDE) 4181 . n - This value should be the same as the local size used in creating the 4182 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4183 calculated if N is given) For square matrices n is almost always m. 4184 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4185 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4186 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4187 . j - column indices 4188 - a - matrix values 4189 4190 Output Parameter: 4191 . mat - the matrix 4192 4193 Level: intermediate 4194 4195 Notes: 4196 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4197 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4198 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4199 4200 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4201 4202 The format which is used for the sparse matrix input, is equivalent to a 4203 row-major ordering.. i.e for the following matrix, the input data expected is 4204 as shown 4205 4206 $ 1 0 0 4207 $ 2 0 3 P0 4208 $ ------- 4209 $ 4 5 6 P1 4210 $ 4211 $ Process0 [P0]: rows_owned=[0,1] 4212 $ i = {0,1,3} [size = nrow+1 = 2+1] 4213 $ j = {0,0,2} [size = 3] 4214 $ v = {1,2,3} [size = 3] 4215 $ 4216 $ Process1 [P1]: rows_owned=[2] 4217 $ i = {0,3} [size = nrow+1 = 1+1] 4218 $ j = {0,1,2} [size = 3] 4219 $ v = {4,5,6} [size = 3] 4220 4221 .keywords: matrix, aij, compressed row, sparse, parallel 4222 4223 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4224 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4225 @*/ 4226 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4227 { 4228 PetscErrorCode ierr; 4229 4230 PetscFunctionBegin; 4231 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4232 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4233 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4234 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4235 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4236 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4237 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4238 PetscFunctionReturn(0); 4239 } 4240 4241 /*@C 4242 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4243 (the default parallel PETSc format). For good matrix assembly performance 4244 the user should preallocate the matrix storage by setting the parameters 4245 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4246 performance can be increased by more than a factor of 50. 4247 4248 Collective on MPI_Comm 4249 4250 Input Parameters: 4251 + comm - MPI communicator 4252 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4253 This value should be the same as the local size used in creating the 4254 y vector for the matrix-vector product y = Ax. 4255 . n - This value should be the same as the local size used in creating the 4256 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4257 calculated if N is given) For square matrices n is almost always m. 4258 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4259 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4260 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4261 (same value is used for all local rows) 4262 . d_nnz - array containing the number of nonzeros in the various rows of the 4263 DIAGONAL portion of the local submatrix (possibly different for each row) 4264 or NULL, if d_nz is used to specify the nonzero structure. 4265 The size of this array is equal to the number of local rows, i.e 'm'. 4266 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4267 submatrix (same value is used for all local rows). 4268 - o_nnz - array containing the number of nonzeros in the various rows of the 4269 OFF-DIAGONAL portion of the local submatrix (possibly different for 4270 each row) or NULL, if o_nz is used to specify the nonzero 4271 structure. The size of this array is equal to the number 4272 of local rows, i.e 'm'. 4273 4274 Output Parameter: 4275 . A - the matrix 4276 4277 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4278 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4279 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4280 4281 Notes: 4282 If the *_nnz parameter is given then the *_nz parameter is ignored 4283 4284 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4285 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4286 storage requirements for this matrix. 4287 4288 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4289 processor than it must be used on all processors that share the object for 4290 that argument. 4291 4292 The user MUST specify either the local or global matrix dimensions 4293 (possibly both). 4294 4295 The parallel matrix is partitioned across processors such that the 4296 first m0 rows belong to process 0, the next m1 rows belong to 4297 process 1, the next m2 rows belong to process 2 etc.. where 4298 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4299 values corresponding to [m x N] submatrix. 4300 4301 The columns are logically partitioned with the n0 columns belonging 4302 to 0th partition, the next n1 columns belonging to the next 4303 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4304 4305 The DIAGONAL portion of the local submatrix on any given processor 4306 is the submatrix corresponding to the rows and columns m,n 4307 corresponding to the given processor. i.e diagonal matrix on 4308 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4309 etc. The remaining portion of the local submatrix [m x (N-n)] 4310 constitute the OFF-DIAGONAL portion. The example below better 4311 illustrates this concept. 4312 4313 For a square global matrix we define each processor's diagonal portion 4314 to be its local rows and the corresponding columns (a square submatrix); 4315 each processor's off-diagonal portion encompasses the remainder of the 4316 local matrix (a rectangular submatrix). 4317 4318 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4319 4320 When calling this routine with a single process communicator, a matrix of 4321 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4322 type of communicator, use the construction mechanism 4323 .vb 4324 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4325 .ve 4326 4327 $ MatCreate(...,&A); 4328 $ MatSetType(A,MATMPIAIJ); 4329 $ MatSetSizes(A, m,n,M,N); 4330 $ MatMPIAIJSetPreallocation(A,...); 4331 4332 By default, this format uses inodes (identical nodes) when possible. 4333 We search for consecutive rows with the same nonzero structure, thereby 4334 reusing matrix information to achieve increased efficiency. 4335 4336 Options Database Keys: 4337 + -mat_no_inode - Do not use inodes 4338 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4339 4340 4341 4342 Example usage: 4343 4344 Consider the following 8x8 matrix with 34 non-zero values, that is 4345 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4346 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4347 as follows 4348 4349 .vb 4350 1 2 0 | 0 3 0 | 0 4 4351 Proc0 0 5 6 | 7 0 0 | 8 0 4352 9 0 10 | 11 0 0 | 12 0 4353 ------------------------------------- 4354 13 0 14 | 15 16 17 | 0 0 4355 Proc1 0 18 0 | 19 20 21 | 0 0 4356 0 0 0 | 22 23 0 | 24 0 4357 ------------------------------------- 4358 Proc2 25 26 27 | 0 0 28 | 29 0 4359 30 0 0 | 31 32 33 | 0 34 4360 .ve 4361 4362 This can be represented as a collection of submatrices as 4363 4364 .vb 4365 A B C 4366 D E F 4367 G H I 4368 .ve 4369 4370 Where the submatrices A,B,C are owned by proc0, D,E,F are 4371 owned by proc1, G,H,I are owned by proc2. 4372 4373 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4374 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4375 The 'M','N' parameters are 8,8, and have the same values on all procs. 4376 4377 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4378 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4379 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4380 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4381 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4382 matrix, ans [DF] as another SeqAIJ matrix. 4383 4384 When d_nz, o_nz parameters are specified, d_nz storage elements are 4385 allocated for every row of the local diagonal submatrix, and o_nz 4386 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4387 One way to choose d_nz and o_nz is to use the max nonzerors per local 4388 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4389 In this case, the values of d_nz,o_nz are 4390 .vb 4391 proc0 : dnz = 2, o_nz = 2 4392 proc1 : dnz = 3, o_nz = 2 4393 proc2 : dnz = 1, o_nz = 4 4394 .ve 4395 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4396 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4397 for proc3. i.e we are using 12+15+10=37 storage locations to store 4398 34 values. 4399 4400 When d_nnz, o_nnz parameters are specified, the storage is specified 4401 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4402 In the above case the values for d_nnz,o_nnz are 4403 .vb 4404 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4405 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4406 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4407 .ve 4408 Here the space allocated is sum of all the above values i.e 34, and 4409 hence pre-allocation is perfect. 4410 4411 Level: intermediate 4412 4413 .keywords: matrix, aij, compressed row, sparse, parallel 4414 4415 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4416 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4417 @*/ 4418 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4419 { 4420 PetscErrorCode ierr; 4421 PetscMPIInt size; 4422 4423 PetscFunctionBegin; 4424 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4425 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4426 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4427 if (size > 1) { 4428 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4429 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4430 } else { 4431 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4432 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4433 } 4434 PetscFunctionReturn(0); 4435 } 4436 4437 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4438 { 4439 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4440 PetscBool flg; 4441 PetscErrorCode ierr; 4442 4443 PetscFunctionBegin; 4444 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4445 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4446 if (Ad) *Ad = a->A; 4447 if (Ao) *Ao = a->B; 4448 if (colmap) *colmap = a->garray; 4449 PetscFunctionReturn(0); 4450 } 4451 4452 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4453 { 4454 PetscErrorCode ierr; 4455 PetscInt m,N,i,rstart,nnz,Ii; 4456 PetscInt *indx; 4457 PetscScalar *values; 4458 4459 PetscFunctionBegin; 4460 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4461 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4462 PetscInt *dnz,*onz,sum,bs,cbs; 4463 4464 if (n == PETSC_DECIDE) { 4465 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4466 } 4467 /* Check sum(n) = N */ 4468 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4469 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4470 4471 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4472 rstart -= m; 4473 4474 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4475 for (i=0; i<m; i++) { 4476 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4477 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4478 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4479 } 4480 4481 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4482 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4483 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4484 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4485 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4486 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4487 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4488 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4489 } 4490 4491 /* numeric phase */ 4492 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4493 for (i=0; i<m; i++) { 4494 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4495 Ii = i + rstart; 4496 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4497 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4498 } 4499 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4500 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4501 PetscFunctionReturn(0); 4502 } 4503 4504 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4505 { 4506 PetscErrorCode ierr; 4507 PetscMPIInt rank; 4508 PetscInt m,N,i,rstart,nnz; 4509 size_t len; 4510 const PetscInt *indx; 4511 PetscViewer out; 4512 char *name; 4513 Mat B; 4514 const PetscScalar *values; 4515 4516 PetscFunctionBegin; 4517 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4518 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4519 /* Should this be the type of the diagonal block of A? */ 4520 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4521 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4522 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4523 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4524 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4525 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4526 for (i=0; i<m; i++) { 4527 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4528 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4529 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4530 } 4531 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4532 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4533 4534 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4535 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4536 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4537 sprintf(name,"%s.%d",outfile,rank); 4538 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4539 ierr = PetscFree(name);CHKERRQ(ierr); 4540 ierr = MatView(B,out);CHKERRQ(ierr); 4541 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4542 ierr = MatDestroy(&B);CHKERRQ(ierr); 4543 PetscFunctionReturn(0); 4544 } 4545 4546 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4547 { 4548 PetscErrorCode ierr; 4549 Mat_Merge_SeqsToMPI *merge; 4550 PetscContainer container; 4551 4552 PetscFunctionBegin; 4553 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4554 if (container) { 4555 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4556 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4557 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4558 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4559 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4560 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4561 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4562 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4563 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4564 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4565 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4566 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4567 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4568 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4569 ierr = PetscFree(merge);CHKERRQ(ierr); 4570 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4571 } 4572 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4573 PetscFunctionReturn(0); 4574 } 4575 4576 #include <../src/mat/utils/freespace.h> 4577 #include <petscbt.h> 4578 4579 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4580 { 4581 PetscErrorCode ierr; 4582 MPI_Comm comm; 4583 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4584 PetscMPIInt size,rank,taga,*len_s; 4585 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4586 PetscInt proc,m; 4587 PetscInt **buf_ri,**buf_rj; 4588 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4589 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4590 MPI_Request *s_waits,*r_waits; 4591 MPI_Status *status; 4592 MatScalar *aa=a->a; 4593 MatScalar **abuf_r,*ba_i; 4594 Mat_Merge_SeqsToMPI *merge; 4595 PetscContainer container; 4596 4597 PetscFunctionBegin; 4598 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4599 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4600 4601 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4602 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4603 4604 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4605 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4606 4607 bi = merge->bi; 4608 bj = merge->bj; 4609 buf_ri = merge->buf_ri; 4610 buf_rj = merge->buf_rj; 4611 4612 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4613 owners = merge->rowmap->range; 4614 len_s = merge->len_s; 4615 4616 /* send and recv matrix values */ 4617 /*-----------------------------*/ 4618 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4619 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4620 4621 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4622 for (proc=0,k=0; proc<size; proc++) { 4623 if (!len_s[proc]) continue; 4624 i = owners[proc]; 4625 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4626 k++; 4627 } 4628 4629 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4630 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4631 ierr = PetscFree(status);CHKERRQ(ierr); 4632 4633 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4634 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4635 4636 /* insert mat values of mpimat */ 4637 /*----------------------------*/ 4638 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4639 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4640 4641 for (k=0; k<merge->nrecv; k++) { 4642 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4643 nrows = *(buf_ri_k[k]); 4644 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4645 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4646 } 4647 4648 /* set values of ba */ 4649 m = merge->rowmap->n; 4650 for (i=0; i<m; i++) { 4651 arow = owners[rank] + i; 4652 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4653 bnzi = bi[i+1] - bi[i]; 4654 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4655 4656 /* add local non-zero vals of this proc's seqmat into ba */ 4657 anzi = ai[arow+1] - ai[arow]; 4658 aj = a->j + ai[arow]; 4659 aa = a->a + ai[arow]; 4660 nextaj = 0; 4661 for (j=0; nextaj<anzi; j++) { 4662 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4663 ba_i[j] += aa[nextaj++]; 4664 } 4665 } 4666 4667 /* add received vals into ba */ 4668 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4669 /* i-th row */ 4670 if (i == *nextrow[k]) { 4671 anzi = *(nextai[k]+1) - *nextai[k]; 4672 aj = buf_rj[k] + *(nextai[k]); 4673 aa = abuf_r[k] + *(nextai[k]); 4674 nextaj = 0; 4675 for (j=0; nextaj<anzi; j++) { 4676 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4677 ba_i[j] += aa[nextaj++]; 4678 } 4679 } 4680 nextrow[k]++; nextai[k]++; 4681 } 4682 } 4683 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4684 } 4685 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4686 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4687 4688 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4689 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4690 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4691 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4692 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4693 PetscFunctionReturn(0); 4694 } 4695 4696 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4697 { 4698 PetscErrorCode ierr; 4699 Mat B_mpi; 4700 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4701 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4702 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4703 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4704 PetscInt len,proc,*dnz,*onz,bs,cbs; 4705 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4706 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4707 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4708 MPI_Status *status; 4709 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4710 PetscBT lnkbt; 4711 Mat_Merge_SeqsToMPI *merge; 4712 PetscContainer container; 4713 4714 PetscFunctionBegin; 4715 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4716 4717 /* make sure it is a PETSc comm */ 4718 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4719 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4720 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4721 4722 ierr = PetscNew(&merge);CHKERRQ(ierr); 4723 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4724 4725 /* determine row ownership */ 4726 /*---------------------------------------------------------*/ 4727 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4728 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4729 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4730 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4731 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4732 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4733 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4734 4735 m = merge->rowmap->n; 4736 owners = merge->rowmap->range; 4737 4738 /* determine the number of messages to send, their lengths */ 4739 /*---------------------------------------------------------*/ 4740 len_s = merge->len_s; 4741 4742 len = 0; /* length of buf_si[] */ 4743 merge->nsend = 0; 4744 for (proc=0; proc<size; proc++) { 4745 len_si[proc] = 0; 4746 if (proc == rank) { 4747 len_s[proc] = 0; 4748 } else { 4749 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4750 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4751 } 4752 if (len_s[proc]) { 4753 merge->nsend++; 4754 nrows = 0; 4755 for (i=owners[proc]; i<owners[proc+1]; i++) { 4756 if (ai[i+1] > ai[i]) nrows++; 4757 } 4758 len_si[proc] = 2*(nrows+1); 4759 len += len_si[proc]; 4760 } 4761 } 4762 4763 /* determine the number and length of messages to receive for ij-structure */ 4764 /*-------------------------------------------------------------------------*/ 4765 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4766 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4767 4768 /* post the Irecv of j-structure */ 4769 /*-------------------------------*/ 4770 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4771 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4772 4773 /* post the Isend of j-structure */ 4774 /*--------------------------------*/ 4775 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4776 4777 for (proc=0, k=0; proc<size; proc++) { 4778 if (!len_s[proc]) continue; 4779 i = owners[proc]; 4780 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4781 k++; 4782 } 4783 4784 /* receives and sends of j-structure are complete */ 4785 /*------------------------------------------------*/ 4786 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4787 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4788 4789 /* send and recv i-structure */ 4790 /*---------------------------*/ 4791 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4792 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4793 4794 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4795 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4796 for (proc=0,k=0; proc<size; proc++) { 4797 if (!len_s[proc]) continue; 4798 /* form outgoing message for i-structure: 4799 buf_si[0]: nrows to be sent 4800 [1:nrows]: row index (global) 4801 [nrows+1:2*nrows+1]: i-structure index 4802 */ 4803 /*-------------------------------------------*/ 4804 nrows = len_si[proc]/2 - 1; 4805 buf_si_i = buf_si + nrows+1; 4806 buf_si[0] = nrows; 4807 buf_si_i[0] = 0; 4808 nrows = 0; 4809 for (i=owners[proc]; i<owners[proc+1]; i++) { 4810 anzi = ai[i+1] - ai[i]; 4811 if (anzi) { 4812 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4813 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4814 nrows++; 4815 } 4816 } 4817 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4818 k++; 4819 buf_si += len_si[proc]; 4820 } 4821 4822 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4823 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4824 4825 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4826 for (i=0; i<merge->nrecv; i++) { 4827 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4828 } 4829 4830 ierr = PetscFree(len_si);CHKERRQ(ierr); 4831 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4832 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4833 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4834 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4835 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4836 ierr = PetscFree(status);CHKERRQ(ierr); 4837 4838 /* compute a local seq matrix in each processor */ 4839 /*----------------------------------------------*/ 4840 /* allocate bi array and free space for accumulating nonzero column info */ 4841 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4842 bi[0] = 0; 4843 4844 /* create and initialize a linked list */ 4845 nlnk = N+1; 4846 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4847 4848 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4849 len = ai[owners[rank+1]] - ai[owners[rank]]; 4850 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4851 4852 current_space = free_space; 4853 4854 /* determine symbolic info for each local row */ 4855 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4856 4857 for (k=0; k<merge->nrecv; k++) { 4858 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4859 nrows = *buf_ri_k[k]; 4860 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4861 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4862 } 4863 4864 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4865 len = 0; 4866 for (i=0; i<m; i++) { 4867 bnzi = 0; 4868 /* add local non-zero cols of this proc's seqmat into lnk */ 4869 arow = owners[rank] + i; 4870 anzi = ai[arow+1] - ai[arow]; 4871 aj = a->j + ai[arow]; 4872 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4873 bnzi += nlnk; 4874 /* add received col data into lnk */ 4875 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4876 if (i == *nextrow[k]) { /* i-th row */ 4877 anzi = *(nextai[k]+1) - *nextai[k]; 4878 aj = buf_rj[k] + *nextai[k]; 4879 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4880 bnzi += nlnk; 4881 nextrow[k]++; nextai[k]++; 4882 } 4883 } 4884 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4885 4886 /* if free space is not available, make more free space */ 4887 if (current_space->local_remaining<bnzi) { 4888 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4889 nspacedouble++; 4890 } 4891 /* copy data into free space, then initialize lnk */ 4892 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4893 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4894 4895 current_space->array += bnzi; 4896 current_space->local_used += bnzi; 4897 current_space->local_remaining -= bnzi; 4898 4899 bi[i+1] = bi[i] + bnzi; 4900 } 4901 4902 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4903 4904 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4905 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4906 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4907 4908 /* create symbolic parallel matrix B_mpi */ 4909 /*---------------------------------------*/ 4910 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4911 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4912 if (n==PETSC_DECIDE) { 4913 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4914 } else { 4915 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4916 } 4917 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4918 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4919 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4920 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4921 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4922 4923 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4924 B_mpi->assembled = PETSC_FALSE; 4925 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4926 merge->bi = bi; 4927 merge->bj = bj; 4928 merge->buf_ri = buf_ri; 4929 merge->buf_rj = buf_rj; 4930 merge->coi = NULL; 4931 merge->coj = NULL; 4932 merge->owners_co = NULL; 4933 4934 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4935 4936 /* attach the supporting struct to B_mpi for reuse */ 4937 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4938 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4939 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4940 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4941 *mpimat = B_mpi; 4942 4943 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4944 PetscFunctionReturn(0); 4945 } 4946 4947 /*@C 4948 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4949 matrices from each processor 4950 4951 Collective on MPI_Comm 4952 4953 Input Parameters: 4954 + comm - the communicators the parallel matrix will live on 4955 . seqmat - the input sequential matrices 4956 . m - number of local rows (or PETSC_DECIDE) 4957 . n - number of local columns (or PETSC_DECIDE) 4958 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4959 4960 Output Parameter: 4961 . mpimat - the parallel matrix generated 4962 4963 Level: advanced 4964 4965 Notes: 4966 The dimensions of the sequential matrix in each processor MUST be the same. 4967 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4968 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4969 @*/ 4970 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4971 { 4972 PetscErrorCode ierr; 4973 PetscMPIInt size; 4974 4975 PetscFunctionBegin; 4976 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4977 if (size == 1) { 4978 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4979 if (scall == MAT_INITIAL_MATRIX) { 4980 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4981 } else { 4982 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4983 } 4984 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4985 PetscFunctionReturn(0); 4986 } 4987 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4988 if (scall == MAT_INITIAL_MATRIX) { 4989 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4990 } 4991 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4992 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4993 PetscFunctionReturn(0); 4994 } 4995 4996 /*@ 4997 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4998 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4999 with MatGetSize() 5000 5001 Not Collective 5002 5003 Input Parameters: 5004 + A - the matrix 5005 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5006 5007 Output Parameter: 5008 . A_loc - the local sequential matrix generated 5009 5010 Level: developer 5011 5012 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5013 5014 @*/ 5015 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5016 { 5017 PetscErrorCode ierr; 5018 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5019 Mat_SeqAIJ *mat,*a,*b; 5020 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5021 MatScalar *aa,*ba,*cam; 5022 PetscScalar *ca; 5023 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5024 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5025 PetscBool match; 5026 MPI_Comm comm; 5027 PetscMPIInt size; 5028 5029 PetscFunctionBegin; 5030 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5031 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5032 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5033 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5034 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5035 5036 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5037 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5038 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5039 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5040 aa = a->a; ba = b->a; 5041 if (scall == MAT_INITIAL_MATRIX) { 5042 if (size == 1) { 5043 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5044 PetscFunctionReturn(0); 5045 } 5046 5047 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5048 ci[0] = 0; 5049 for (i=0; i<am; i++) { 5050 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5051 } 5052 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5053 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5054 k = 0; 5055 for (i=0; i<am; i++) { 5056 ncols_o = bi[i+1] - bi[i]; 5057 ncols_d = ai[i+1] - ai[i]; 5058 /* off-diagonal portion of A */ 5059 for (jo=0; jo<ncols_o; jo++) { 5060 col = cmap[*bj]; 5061 if (col >= cstart) break; 5062 cj[k] = col; bj++; 5063 ca[k++] = *ba++; 5064 } 5065 /* diagonal portion of A */ 5066 for (j=0; j<ncols_d; j++) { 5067 cj[k] = cstart + *aj++; 5068 ca[k++] = *aa++; 5069 } 5070 /* off-diagonal portion of A */ 5071 for (j=jo; j<ncols_o; j++) { 5072 cj[k] = cmap[*bj++]; 5073 ca[k++] = *ba++; 5074 } 5075 } 5076 /* put together the new matrix */ 5077 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5078 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5079 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5080 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5081 mat->free_a = PETSC_TRUE; 5082 mat->free_ij = PETSC_TRUE; 5083 mat->nonew = 0; 5084 } else if (scall == MAT_REUSE_MATRIX) { 5085 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5086 ci = mat->i; cj = mat->j; cam = mat->a; 5087 for (i=0; i<am; i++) { 5088 /* off-diagonal portion of A */ 5089 ncols_o = bi[i+1] - bi[i]; 5090 for (jo=0; jo<ncols_o; jo++) { 5091 col = cmap[*bj]; 5092 if (col >= cstart) break; 5093 *cam++ = *ba++; bj++; 5094 } 5095 /* diagonal portion of A */ 5096 ncols_d = ai[i+1] - ai[i]; 5097 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5098 /* off-diagonal portion of A */ 5099 for (j=jo; j<ncols_o; j++) { 5100 *cam++ = *ba++; bj++; 5101 } 5102 } 5103 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5104 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5105 PetscFunctionReturn(0); 5106 } 5107 5108 /*@C 5109 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5110 5111 Not Collective 5112 5113 Input Parameters: 5114 + A - the matrix 5115 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5116 - row, col - index sets of rows and columns to extract (or NULL) 5117 5118 Output Parameter: 5119 . A_loc - the local sequential matrix generated 5120 5121 Level: developer 5122 5123 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5124 5125 @*/ 5126 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5127 { 5128 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5129 PetscErrorCode ierr; 5130 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5131 IS isrowa,iscola; 5132 Mat *aloc; 5133 PetscBool match; 5134 5135 PetscFunctionBegin; 5136 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5137 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5138 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5139 if (!row) { 5140 start = A->rmap->rstart; end = A->rmap->rend; 5141 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5142 } else { 5143 isrowa = *row; 5144 } 5145 if (!col) { 5146 start = A->cmap->rstart; 5147 cmap = a->garray; 5148 nzA = a->A->cmap->n; 5149 nzB = a->B->cmap->n; 5150 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5151 ncols = 0; 5152 for (i=0; i<nzB; i++) { 5153 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5154 else break; 5155 } 5156 imark = i; 5157 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5158 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5159 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5160 } else { 5161 iscola = *col; 5162 } 5163 if (scall != MAT_INITIAL_MATRIX) { 5164 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5165 aloc[0] = *A_loc; 5166 } 5167 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5168 if (!col) { /* attach global id of condensed columns */ 5169 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5170 } 5171 *A_loc = aloc[0]; 5172 ierr = PetscFree(aloc);CHKERRQ(ierr); 5173 if (!row) { 5174 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5175 } 5176 if (!col) { 5177 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5178 } 5179 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5180 PetscFunctionReturn(0); 5181 } 5182 5183 /*@C 5184 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5185 5186 Collective on Mat 5187 5188 Input Parameters: 5189 + A,B - the matrices in mpiaij format 5190 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5191 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5192 5193 Output Parameter: 5194 + rowb, colb - index sets of rows and columns of B to extract 5195 - B_seq - the sequential matrix generated 5196 5197 Level: developer 5198 5199 @*/ 5200 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5201 { 5202 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5203 PetscErrorCode ierr; 5204 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5205 IS isrowb,iscolb; 5206 Mat *bseq=NULL; 5207 5208 PetscFunctionBegin; 5209 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5210 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5211 } 5212 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5213 5214 if (scall == MAT_INITIAL_MATRIX) { 5215 start = A->cmap->rstart; 5216 cmap = a->garray; 5217 nzA = a->A->cmap->n; 5218 nzB = a->B->cmap->n; 5219 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5220 ncols = 0; 5221 for (i=0; i<nzB; i++) { /* row < local row index */ 5222 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5223 else break; 5224 } 5225 imark = i; 5226 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5227 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5228 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5229 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5230 } else { 5231 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5232 isrowb = *rowb; iscolb = *colb; 5233 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5234 bseq[0] = *B_seq; 5235 } 5236 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5237 *B_seq = bseq[0]; 5238 ierr = PetscFree(bseq);CHKERRQ(ierr); 5239 if (!rowb) { 5240 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5241 } else { 5242 *rowb = isrowb; 5243 } 5244 if (!colb) { 5245 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5246 } else { 5247 *colb = iscolb; 5248 } 5249 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5250 PetscFunctionReturn(0); 5251 } 5252 5253 /* 5254 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5255 of the OFF-DIAGONAL portion of local A 5256 5257 Collective on Mat 5258 5259 Input Parameters: 5260 + A,B - the matrices in mpiaij format 5261 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5262 5263 Output Parameter: 5264 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5265 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5266 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5267 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5268 5269 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5270 for this matrix. This is not desirable.. 5271 5272 Level: developer 5273 5274 */ 5275 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5276 { 5277 PetscErrorCode ierr; 5278 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5279 Mat_SeqAIJ *b_oth; 5280 VecScatter ctx; 5281 MPI_Comm comm; 5282 const PetscMPIInt *rprocs,*sprocs; 5283 const PetscInt *srow,*rstarts,*sstarts; 5284 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5285 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5286 PetscScalar *b_otha,*bufa,*bufA,*vals; 5287 MPI_Request *rwaits = NULL,*swaits = NULL; 5288 MPI_Status rstatus; 5289 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5290 5291 PetscFunctionBegin; 5292 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5293 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5294 5295 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5296 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5297 } 5298 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5299 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5300 5301 if (size == 1) { 5302 startsj_s = NULL; 5303 bufa_ptr = NULL; 5304 *B_oth = NULL; 5305 PetscFunctionReturn(0); 5306 } 5307 5308 ctx = a->Mvctx; 5309 tag = ((PetscObject)ctx)->tag; 5310 5311 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5312 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5313 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5314 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5315 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5316 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5317 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5318 5319 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5320 if (scall == MAT_INITIAL_MATRIX) { 5321 /* i-array */ 5322 /*---------*/ 5323 /* post receives */ 5324 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5325 for (i=0; i<nrecvs; i++) { 5326 rowlen = rvalues + rstarts[i]*rbs; 5327 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5328 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5329 } 5330 5331 /* pack the outgoing message */ 5332 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5333 5334 sstartsj[0] = 0; 5335 rstartsj[0] = 0; 5336 len = 0; /* total length of j or a array to be sent */ 5337 if (nsends) { 5338 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5339 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5340 } 5341 for (i=0; i<nsends; i++) { 5342 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5343 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5344 for (j=0; j<nrows; j++) { 5345 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5346 for (l=0; l<sbs; l++) { 5347 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5348 5349 rowlen[j*sbs+l] = ncols; 5350 5351 len += ncols; 5352 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5353 } 5354 k++; 5355 } 5356 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5357 5358 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5359 } 5360 /* recvs and sends of i-array are completed */ 5361 i = nrecvs; 5362 while (i--) { 5363 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5364 } 5365 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5366 ierr = PetscFree(svalues);CHKERRQ(ierr); 5367 5368 /* allocate buffers for sending j and a arrays */ 5369 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5370 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5371 5372 /* create i-array of B_oth */ 5373 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5374 5375 b_othi[0] = 0; 5376 len = 0; /* total length of j or a array to be received */ 5377 k = 0; 5378 for (i=0; i<nrecvs; i++) { 5379 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5380 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5381 for (j=0; j<nrows; j++) { 5382 b_othi[k+1] = b_othi[k] + rowlen[j]; 5383 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5384 k++; 5385 } 5386 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5387 } 5388 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5389 5390 /* allocate space for j and a arrrays of B_oth */ 5391 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5392 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5393 5394 /* j-array */ 5395 /*---------*/ 5396 /* post receives of j-array */ 5397 for (i=0; i<nrecvs; i++) { 5398 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5399 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5400 } 5401 5402 /* pack the outgoing message j-array */ 5403 if (nsends) k = sstarts[0]; 5404 for (i=0; i<nsends; i++) { 5405 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5406 bufJ = bufj+sstartsj[i]; 5407 for (j=0; j<nrows; j++) { 5408 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5409 for (ll=0; ll<sbs; ll++) { 5410 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5411 for (l=0; l<ncols; l++) { 5412 *bufJ++ = cols[l]; 5413 } 5414 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5415 } 5416 } 5417 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5418 } 5419 5420 /* recvs and sends of j-array are completed */ 5421 i = nrecvs; 5422 while (i--) { 5423 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5424 } 5425 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5426 } else if (scall == MAT_REUSE_MATRIX) { 5427 sstartsj = *startsj_s; 5428 rstartsj = *startsj_r; 5429 bufa = *bufa_ptr; 5430 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5431 b_otha = b_oth->a; 5432 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5433 5434 /* a-array */ 5435 /*---------*/ 5436 /* post receives of a-array */ 5437 for (i=0; i<nrecvs; i++) { 5438 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5439 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5440 } 5441 5442 /* pack the outgoing message a-array */ 5443 if (nsends) k = sstarts[0]; 5444 for (i=0; i<nsends; i++) { 5445 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5446 bufA = bufa+sstartsj[i]; 5447 for (j=0; j<nrows; j++) { 5448 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5449 for (ll=0; ll<sbs; ll++) { 5450 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5451 for (l=0; l<ncols; l++) { 5452 *bufA++ = vals[l]; 5453 } 5454 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5455 } 5456 } 5457 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5458 } 5459 /* recvs and sends of a-array are completed */ 5460 i = nrecvs; 5461 while (i--) { 5462 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5463 } 5464 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5465 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5466 5467 if (scall == MAT_INITIAL_MATRIX) { 5468 /* put together the new matrix */ 5469 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5470 5471 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5472 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5473 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5474 b_oth->free_a = PETSC_TRUE; 5475 b_oth->free_ij = PETSC_TRUE; 5476 b_oth->nonew = 0; 5477 5478 ierr = PetscFree(bufj);CHKERRQ(ierr); 5479 if (!startsj_s || !bufa_ptr) { 5480 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5481 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5482 } else { 5483 *startsj_s = sstartsj; 5484 *startsj_r = rstartsj; 5485 *bufa_ptr = bufa; 5486 } 5487 } 5488 5489 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5490 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5491 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5492 PetscFunctionReturn(0); 5493 } 5494 5495 /*@C 5496 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5497 5498 Not Collective 5499 5500 Input Parameters: 5501 . A - The matrix in mpiaij format 5502 5503 Output Parameter: 5504 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5505 . colmap - A map from global column index to local index into lvec 5506 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5507 5508 Level: developer 5509 5510 @*/ 5511 #if defined(PETSC_USE_CTABLE) 5512 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5513 #else 5514 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5515 #endif 5516 { 5517 Mat_MPIAIJ *a; 5518 5519 PetscFunctionBegin; 5520 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5521 PetscValidPointer(lvec, 2); 5522 PetscValidPointer(colmap, 3); 5523 PetscValidPointer(multScatter, 4); 5524 a = (Mat_MPIAIJ*) A->data; 5525 if (lvec) *lvec = a->lvec; 5526 if (colmap) *colmap = a->colmap; 5527 if (multScatter) *multScatter = a->Mvctx; 5528 PetscFunctionReturn(0); 5529 } 5530 5531 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5532 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5533 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5534 #if defined(PETSC_HAVE_MKL_SPARSE) 5535 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5536 #endif 5537 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5538 #if defined(PETSC_HAVE_ELEMENTAL) 5539 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5540 #endif 5541 #if defined(PETSC_HAVE_HYPRE) 5542 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5543 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5544 #endif 5545 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5546 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5547 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5548 5549 /* 5550 Computes (B'*A')' since computing B*A directly is untenable 5551 5552 n p p 5553 ( ) ( ) ( ) 5554 m ( A ) * n ( B ) = m ( C ) 5555 ( ) ( ) ( ) 5556 5557 */ 5558 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5559 { 5560 PetscErrorCode ierr; 5561 Mat At,Bt,Ct; 5562 5563 PetscFunctionBegin; 5564 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5565 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5566 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5567 ierr = MatDestroy(&At);CHKERRQ(ierr); 5568 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5569 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5570 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5571 PetscFunctionReturn(0); 5572 } 5573 5574 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5575 { 5576 PetscErrorCode ierr; 5577 PetscInt m=A->rmap->n,n=B->cmap->n; 5578 Mat Cmat; 5579 5580 PetscFunctionBegin; 5581 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5582 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5583 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5584 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5585 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5586 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5587 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5588 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5589 5590 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5591 5592 *C = Cmat; 5593 PetscFunctionReturn(0); 5594 } 5595 5596 /* ----------------------------------------------------------------*/ 5597 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5598 { 5599 PetscErrorCode ierr; 5600 5601 PetscFunctionBegin; 5602 if (scall == MAT_INITIAL_MATRIX) { 5603 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5604 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5605 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5606 } 5607 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5608 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5609 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5610 PetscFunctionReturn(0); 5611 } 5612 5613 /*MC 5614 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5615 5616 Options Database Keys: 5617 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5618 5619 Level: beginner 5620 5621 .seealso: MatCreateAIJ() 5622 M*/ 5623 5624 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5625 { 5626 Mat_MPIAIJ *b; 5627 PetscErrorCode ierr; 5628 PetscMPIInt size; 5629 5630 PetscFunctionBegin; 5631 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5632 5633 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5634 B->data = (void*)b; 5635 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5636 B->assembled = PETSC_FALSE; 5637 B->insertmode = NOT_SET_VALUES; 5638 b->size = size; 5639 5640 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5641 5642 /* build cache for off array entries formed */ 5643 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5644 5645 b->donotstash = PETSC_FALSE; 5646 b->colmap = 0; 5647 b->garray = 0; 5648 b->roworiented = PETSC_TRUE; 5649 5650 /* stuff used for matrix vector multiply */ 5651 b->lvec = NULL; 5652 b->Mvctx = NULL; 5653 5654 /* stuff for MatGetRow() */ 5655 b->rowindices = 0; 5656 b->rowvalues = 0; 5657 b->getrowactive = PETSC_FALSE; 5658 5659 /* flexible pointer used in CUSP/CUSPARSE classes */ 5660 b->spptr = NULL; 5661 5662 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5663 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5664 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5665 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5666 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5667 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5668 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5669 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5670 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5671 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5672 #if defined(PETSC_HAVE_MKL_SPARSE) 5673 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5674 #endif 5675 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5676 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5677 #if defined(PETSC_HAVE_ELEMENTAL) 5678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5679 #endif 5680 #if defined(PETSC_HAVE_HYPRE) 5681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5682 #endif 5683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5688 #if defined(PETSC_HAVE_HYPRE) 5689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5690 #endif 5691 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5692 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5693 PetscFunctionReturn(0); 5694 } 5695 5696 /*@C 5697 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5698 and "off-diagonal" part of the matrix in CSR format. 5699 5700 Collective on MPI_Comm 5701 5702 Input Parameters: 5703 + comm - MPI communicator 5704 . m - number of local rows (Cannot be PETSC_DECIDE) 5705 . n - This value should be the same as the local size used in creating the 5706 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5707 calculated if N is given) For square matrices n is almost always m. 5708 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5709 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5710 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5711 . j - column indices 5712 . a - matrix values 5713 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5714 . oj - column indices 5715 - oa - matrix values 5716 5717 Output Parameter: 5718 . mat - the matrix 5719 5720 Level: advanced 5721 5722 Notes: 5723 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5724 must free the arrays once the matrix has been destroyed and not before. 5725 5726 The i and j indices are 0 based 5727 5728 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5729 5730 This sets local rows and cannot be used to set off-processor values. 5731 5732 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5733 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5734 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5735 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5736 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5737 communication if it is known that only local entries will be set. 5738 5739 .keywords: matrix, aij, compressed row, sparse, parallel 5740 5741 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5742 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5743 @*/ 5744 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5745 { 5746 PetscErrorCode ierr; 5747 Mat_MPIAIJ *maij; 5748 5749 PetscFunctionBegin; 5750 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5751 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5752 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5753 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5754 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5755 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5756 maij = (Mat_MPIAIJ*) (*mat)->data; 5757 5758 (*mat)->preallocated = PETSC_TRUE; 5759 5760 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5761 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5762 5763 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5764 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5765 5766 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5767 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5768 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5769 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5770 5771 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5772 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5773 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5774 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5775 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5776 PetscFunctionReturn(0); 5777 } 5778 5779 /* 5780 Special version for direct calls from Fortran 5781 */ 5782 #include <petsc/private/fortranimpl.h> 5783 5784 /* Change these macros so can be used in void function */ 5785 #undef CHKERRQ 5786 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5787 #undef SETERRQ2 5788 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5789 #undef SETERRQ3 5790 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5791 #undef SETERRQ 5792 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5793 5794 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5795 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5796 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5797 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5798 #else 5799 #endif 5800 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5801 { 5802 Mat mat = *mmat; 5803 PetscInt m = *mm, n = *mn; 5804 InsertMode addv = *maddv; 5805 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5806 PetscScalar value; 5807 PetscErrorCode ierr; 5808 5809 MatCheckPreallocated(mat,1); 5810 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5811 5812 #if defined(PETSC_USE_DEBUG) 5813 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5814 #endif 5815 { 5816 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5817 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5818 PetscBool roworiented = aij->roworiented; 5819 5820 /* Some Variables required in the macro */ 5821 Mat A = aij->A; 5822 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5823 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5824 MatScalar *aa = a->a; 5825 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5826 Mat B = aij->B; 5827 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5828 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5829 MatScalar *ba = b->a; 5830 5831 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5832 PetscInt nonew = a->nonew; 5833 MatScalar *ap1,*ap2; 5834 5835 PetscFunctionBegin; 5836 for (i=0; i<m; i++) { 5837 if (im[i] < 0) continue; 5838 #if defined(PETSC_USE_DEBUG) 5839 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5840 #endif 5841 if (im[i] >= rstart && im[i] < rend) { 5842 row = im[i] - rstart; 5843 lastcol1 = -1; 5844 rp1 = aj + ai[row]; 5845 ap1 = aa + ai[row]; 5846 rmax1 = aimax[row]; 5847 nrow1 = ailen[row]; 5848 low1 = 0; 5849 high1 = nrow1; 5850 lastcol2 = -1; 5851 rp2 = bj + bi[row]; 5852 ap2 = ba + bi[row]; 5853 rmax2 = bimax[row]; 5854 nrow2 = bilen[row]; 5855 low2 = 0; 5856 high2 = nrow2; 5857 5858 for (j=0; j<n; j++) { 5859 if (roworiented) value = v[i*n+j]; 5860 else value = v[i+j*m]; 5861 if (in[j] >= cstart && in[j] < cend) { 5862 col = in[j] - cstart; 5863 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5864 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5865 } else if (in[j] < 0) continue; 5866 #if defined(PETSC_USE_DEBUG) 5867 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5868 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5869 #endif 5870 else { 5871 if (mat->was_assembled) { 5872 if (!aij->colmap) { 5873 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5874 } 5875 #if defined(PETSC_USE_CTABLE) 5876 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5877 col--; 5878 #else 5879 col = aij->colmap[in[j]] - 1; 5880 #endif 5881 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5882 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5883 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5884 col = in[j]; 5885 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5886 B = aij->B; 5887 b = (Mat_SeqAIJ*)B->data; 5888 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5889 rp2 = bj + bi[row]; 5890 ap2 = ba + bi[row]; 5891 rmax2 = bimax[row]; 5892 nrow2 = bilen[row]; 5893 low2 = 0; 5894 high2 = nrow2; 5895 bm = aij->B->rmap->n; 5896 ba = b->a; 5897 } 5898 } else col = in[j]; 5899 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5900 } 5901 } 5902 } else if (!aij->donotstash) { 5903 if (roworiented) { 5904 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5905 } else { 5906 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5907 } 5908 } 5909 } 5910 } 5911 PetscFunctionReturnVoid(); 5912 } 5913