1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/isimpl.h> 6 #include <petscblaslapack.h> 7 #include <petscsf.h> 8 9 /*MC 10 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 11 12 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 13 and MATMPIAIJ otherwise. As a result, for single process communicators, 14 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 15 for communicators controlling multiple processes. It is recommended that you call both of 16 the above preallocation routines for simplicity. 17 18 Options Database Keys: 19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 20 21 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 22 enough exist. 23 24 Level: beginner 25 26 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 27 M*/ 28 29 /*MC 30 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 31 32 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 33 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 34 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 35 for communicators controlling multiple processes. It is recommended that you call both of 36 the above preallocation routines for simplicity. 37 38 Options Database Keys: 39 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 40 41 Level: beginner 42 43 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 44 M*/ 45 46 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 47 { 48 PetscErrorCode ierr; 49 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 50 51 PetscFunctionBegin; 52 if (mat->A) { 53 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 54 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 55 } 56 PetscFunctionReturn(0); 57 } 58 59 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 60 { 61 PetscErrorCode ierr; 62 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 63 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 64 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 65 const PetscInt *ia,*ib; 66 const MatScalar *aa,*bb; 67 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 68 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 69 70 PetscFunctionBegin; 71 *keptrows = 0; 72 ia = a->i; 73 ib = b->i; 74 for (i=0; i<m; i++) { 75 na = ia[i+1] - ia[i]; 76 nb = ib[i+1] - ib[i]; 77 if (!na && !nb) { 78 cnt++; 79 goto ok1; 80 } 81 aa = a->a + ia[i]; 82 for (j=0; j<na; j++) { 83 if (aa[j] != 0.0) goto ok1; 84 } 85 bb = b->a + ib[i]; 86 for (j=0; j <nb; j++) { 87 if (bb[j] != 0.0) goto ok1; 88 } 89 cnt++; 90 ok1:; 91 } 92 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 93 if (!n0rows) PetscFunctionReturn(0); 94 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 95 cnt = 0; 96 for (i=0; i<m; i++) { 97 na = ia[i+1] - ia[i]; 98 nb = ib[i+1] - ib[i]; 99 if (!na && !nb) continue; 100 aa = a->a + ia[i]; 101 for (j=0; j<na;j++) { 102 if (aa[j] != 0.0) { 103 rows[cnt++] = rstart + i; 104 goto ok2; 105 } 106 } 107 bb = b->a + ib[i]; 108 for (j=0; j<nb; j++) { 109 if (bb[j] != 0.0) { 110 rows[cnt++] = rstart + i; 111 goto ok2; 112 } 113 } 114 ok2:; 115 } 116 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 117 PetscFunctionReturn(0); 118 } 119 120 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 121 { 122 PetscErrorCode ierr; 123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 124 125 PetscFunctionBegin; 126 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 127 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 128 } else { 129 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 130 } 131 PetscFunctionReturn(0); 132 } 133 134 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 135 { 136 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 137 PetscErrorCode ierr; 138 PetscInt i,rstart,nrows,*rows; 139 140 PetscFunctionBegin; 141 *zrows = NULL; 142 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 143 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 144 for (i=0; i<nrows; i++) rows[i] += rstart; 145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 150 { 151 PetscErrorCode ierr; 152 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 153 PetscInt i,n,*garray = aij->garray; 154 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 155 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 156 PetscReal *work; 157 158 PetscFunctionBegin; 159 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 160 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 161 if (type == NORM_2) { 162 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 163 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 164 } 165 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 166 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 167 } 168 } else if (type == NORM_1) { 169 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 170 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 171 } 172 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 173 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 174 } 175 } else if (type == NORM_INFINITY) { 176 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 177 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 178 } 179 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 180 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 181 } 182 183 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 184 if (type == NORM_INFINITY) { 185 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 186 } else { 187 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 188 } 189 ierr = PetscFree(work);CHKERRQ(ierr); 190 if (type == NORM_2) { 191 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 192 } 193 PetscFunctionReturn(0); 194 } 195 196 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 197 { 198 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 199 IS sis,gis; 200 PetscErrorCode ierr; 201 const PetscInt *isis,*igis; 202 PetscInt n,*iis,nsis,ngis,rstart,i; 203 204 PetscFunctionBegin; 205 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 206 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 207 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 208 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 209 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 210 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 211 212 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 213 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 214 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 215 n = ngis + nsis; 216 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 217 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 218 for (i=0; i<n; i++) iis[i] += rstart; 219 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 220 221 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 222 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 223 ierr = ISDestroy(&sis);CHKERRQ(ierr); 224 ierr = ISDestroy(&gis);CHKERRQ(ierr); 225 PetscFunctionReturn(0); 226 } 227 228 /* 229 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 230 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 231 232 Only for square matrices 233 234 Used by a preconditioner, hence PETSC_EXTERN 235 */ 236 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 237 { 238 PetscMPIInt rank,size; 239 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 240 PetscErrorCode ierr; 241 Mat mat; 242 Mat_SeqAIJ *gmata; 243 PetscMPIInt tag; 244 MPI_Status status; 245 PetscBool aij; 246 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 247 248 PetscFunctionBegin; 249 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 250 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 251 if (!rank) { 252 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 253 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 254 } 255 if (reuse == MAT_INITIAL_MATRIX) { 256 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 257 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 258 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 259 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 260 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 261 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 262 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 263 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 264 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 265 266 rowners[0] = 0; 267 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 268 rstart = rowners[rank]; 269 rend = rowners[rank+1]; 270 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 271 if (!rank) { 272 gmata = (Mat_SeqAIJ*) gmat->data; 273 /* send row lengths to all processors */ 274 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 275 for (i=1; i<size; i++) { 276 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 277 } 278 /* determine number diagonal and off-diagonal counts */ 279 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 280 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 281 jj = 0; 282 for (i=0; i<m; i++) { 283 for (j=0; j<dlens[i]; j++) { 284 if (gmata->j[jj] < rstart) ld[i]++; 285 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 286 jj++; 287 } 288 } 289 /* send column indices to other processes */ 290 for (i=1; i<size; i++) { 291 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 292 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 294 } 295 296 /* send numerical values to other processes */ 297 for (i=1; i<size; i++) { 298 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 299 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 300 } 301 gmataa = gmata->a; 302 gmataj = gmata->j; 303 304 } else { 305 /* receive row lengths */ 306 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 307 /* receive column indices */ 308 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 309 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 310 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* determine number diagonal and off-diagonal counts */ 312 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 313 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 314 jj = 0; 315 for (i=0; i<m; i++) { 316 for (j=0; j<dlens[i]; j++) { 317 if (gmataj[jj] < rstart) ld[i]++; 318 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 319 jj++; 320 } 321 } 322 /* receive numerical values */ 323 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 324 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 325 } 326 /* set preallocation */ 327 for (i=0; i<m; i++) { 328 dlens[i] -= olens[i]; 329 } 330 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 331 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 332 333 for (i=0; i<m; i++) { 334 dlens[i] += olens[i]; 335 } 336 cnt = 0; 337 for (i=0; i<m; i++) { 338 row = rstart + i; 339 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 340 cnt += dlens[i]; 341 } 342 if (rank) { 343 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 344 } 345 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 346 ierr = PetscFree(rowners);CHKERRQ(ierr); 347 348 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 349 350 *inmat = mat; 351 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 352 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 353 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 354 mat = *inmat; 355 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 356 if (!rank) { 357 /* send numerical values to other processes */ 358 gmata = (Mat_SeqAIJ*) gmat->data; 359 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 360 gmataa = gmata->a; 361 for (i=1; i<size; i++) { 362 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 363 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 364 } 365 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 366 } else { 367 /* receive numerical values from process 0*/ 368 nz = Ad->nz + Ao->nz; 369 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 370 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 371 } 372 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 373 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 374 ad = Ad->a; 375 ao = Ao->a; 376 if (mat->rmap->n) { 377 i = 0; 378 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 379 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 380 } 381 for (i=1; i<mat->rmap->n; i++) { 382 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 i--; 386 if (mat->rmap->n) { 387 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 388 } 389 if (rank) { 390 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 391 } 392 } 393 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 /* 399 Local utility routine that creates a mapping from the global column 400 number to the local number in the off-diagonal part of the local 401 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 402 a slightly higher hash table cost; without it it is not scalable (each processor 403 has an order N integer array but is fast to acess. 404 */ 405 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 406 { 407 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 408 PetscErrorCode ierr; 409 PetscInt n = aij->B->cmap->n,i; 410 411 PetscFunctionBegin; 412 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 413 #if defined(PETSC_USE_CTABLE) 414 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 415 for (i=0; i<n; i++) { 416 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 417 } 418 #else 419 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 420 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 421 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 422 #endif 423 PetscFunctionReturn(0); 424 } 425 426 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 427 { \ 428 if (col <= lastcol1) low1 = 0; \ 429 else high1 = nrow1; \ 430 lastcol1 = col;\ 431 while (high1-low1 > 5) { \ 432 t = (low1+high1)/2; \ 433 if (rp1[t] > col) high1 = t; \ 434 else low1 = t; \ 435 } \ 436 for (_i=low1; _i<high1; _i++) { \ 437 if (rp1[_i] > col) break; \ 438 if (rp1[_i] == col) { \ 439 if (addv == ADD_VALUES) ap1[_i] += value; \ 440 else ap1[_i] = value; \ 441 goto a_noinsert; \ 442 } \ 443 } \ 444 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 445 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 446 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 447 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 448 N = nrow1++ - 1; a->nz++; high1++; \ 449 /* shift up all the later entries in this row */ \ 450 for (ii=N; ii>=_i; ii--) { \ 451 rp1[ii+1] = rp1[ii]; \ 452 ap1[ii+1] = ap1[ii]; \ 453 } \ 454 rp1[_i] = col; \ 455 ap1[_i] = value; \ 456 A->nonzerostate++;\ 457 a_noinsert: ; \ 458 ailen[row] = nrow1; \ 459 } 460 461 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 462 { \ 463 if (col <= lastcol2) low2 = 0; \ 464 else high2 = nrow2; \ 465 lastcol2 = col; \ 466 while (high2-low2 > 5) { \ 467 t = (low2+high2)/2; \ 468 if (rp2[t] > col) high2 = t; \ 469 else low2 = t; \ 470 } \ 471 for (_i=low2; _i<high2; _i++) { \ 472 if (rp2[_i] > col) break; \ 473 if (rp2[_i] == col) { \ 474 if (addv == ADD_VALUES) ap2[_i] += value; \ 475 else ap2[_i] = value; \ 476 goto b_noinsert; \ 477 } \ 478 } \ 479 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 480 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 481 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 482 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 483 N = nrow2++ - 1; b->nz++; high2++; \ 484 /* shift up all the later entries in this row */ \ 485 for (ii=N; ii>=_i; ii--) { \ 486 rp2[ii+1] = rp2[ii]; \ 487 ap2[ii+1] = ap2[ii]; \ 488 } \ 489 rp2[_i] = col; \ 490 ap2[_i] = value; \ 491 B->nonzerostate++; \ 492 b_noinsert: ; \ 493 bilen[row] = nrow2; \ 494 } 495 496 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 497 { 498 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 499 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 500 PetscErrorCode ierr; 501 PetscInt l,*garray = mat->garray,diag; 502 503 PetscFunctionBegin; 504 /* code only works for square matrices A */ 505 506 /* find size of row to the left of the diagonal part */ 507 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 508 row = row - diag; 509 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 510 if (garray[b->j[b->i[row]+l]] > diag) break; 511 } 512 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 513 514 /* diagonal part */ 515 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 516 517 /* right of diagonal part */ 518 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 519 PetscFunctionReturn(0); 520 } 521 522 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 523 { 524 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 525 PetscScalar value; 526 PetscErrorCode ierr; 527 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 528 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 529 PetscBool roworiented = aij->roworiented; 530 531 /* Some Variables required in the macro */ 532 Mat A = aij->A; 533 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 534 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 535 MatScalar *aa = a->a; 536 PetscBool ignorezeroentries = a->ignorezeroentries; 537 Mat B = aij->B; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 539 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 540 MatScalar *ba = b->a; 541 542 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 543 PetscInt nonew; 544 MatScalar *ap1,*ap2; 545 546 PetscFunctionBegin; 547 for (i=0; i<m; i++) { 548 if (im[i] < 0) continue; 549 #if defined(PETSC_USE_DEBUG) 550 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 551 #endif 552 if (im[i] >= rstart && im[i] < rend) { 553 row = im[i] - rstart; 554 lastcol1 = -1; 555 rp1 = aj + ai[row]; 556 ap1 = aa + ai[row]; 557 rmax1 = aimax[row]; 558 nrow1 = ailen[row]; 559 low1 = 0; 560 high1 = nrow1; 561 lastcol2 = -1; 562 rp2 = bj + bi[row]; 563 ap2 = ba + bi[row]; 564 rmax2 = bimax[row]; 565 nrow2 = bilen[row]; 566 low2 = 0; 567 high2 = nrow2; 568 569 for (j=0; j<n; j++) { 570 if (roworiented) value = v[i*n+j]; 571 else value = v[i+j*m]; 572 if (in[j] >= cstart && in[j] < cend) { 573 col = in[j] - cstart; 574 nonew = a->nonew; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 576 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 577 } else if (in[j] < 0) continue; 578 #if defined(PETSC_USE_DEBUG) 579 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 580 #endif 581 else { 582 if (mat->was_assembled) { 583 if (!aij->colmap) { 584 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 585 } 586 #if defined(PETSC_USE_CTABLE) 587 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 593 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ*)B->data; 598 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 599 rp2 = bj + bi[row]; 600 ap2 = ba + bi[row]; 601 rmax2 = bimax[row]; 602 nrow2 = bilen[row]; 603 low2 = 0; 604 high2 = nrow2; 605 bm = aij->B->rmap->n; 606 ba = b->a; 607 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 608 } else col = in[j]; 609 nonew = b->nonew; 610 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 611 } 612 } 613 } else { 614 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 615 if (!aij->donotstash) { 616 mat->assembled = PETSC_FALSE; 617 if (roworiented) { 618 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 619 } else { 620 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 621 } 622 } 623 } 624 } 625 PetscFunctionReturn(0); 626 } 627 628 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 629 { 630 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 631 PetscErrorCode ierr; 632 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 633 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 634 635 PetscFunctionBegin; 636 for (i=0; i<m; i++) { 637 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 638 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 639 if (idxm[i] >= rstart && idxm[i] < rend) { 640 row = idxm[i] - rstart; 641 for (j=0; j<n; j++) { 642 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 643 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 644 if (idxn[j] >= cstart && idxn[j] < cend) { 645 col = idxn[j] - cstart; 646 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 647 } else { 648 if (!aij->colmap) { 649 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 650 } 651 #if defined(PETSC_USE_CTABLE) 652 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 653 col--; 654 #else 655 col = aij->colmap[idxn[j]] - 1; 656 #endif 657 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 658 else { 659 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 660 } 661 } 662 } 663 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 664 } 665 PetscFunctionReturn(0); 666 } 667 668 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 669 670 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 671 { 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 673 PetscErrorCode ierr; 674 PetscInt nstash,reallocs; 675 676 PetscFunctionBegin; 677 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 678 679 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 680 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 681 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 682 PetscFunctionReturn(0); 683 } 684 685 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 688 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 689 PetscErrorCode ierr; 690 PetscMPIInt n; 691 PetscInt i,j,rstart,ncols,flg; 692 PetscInt *row,*col; 693 PetscBool other_disassembled; 694 PetscScalar *val; 695 696 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 697 698 PetscFunctionBegin; 699 if (!aij->donotstash && !mat->nooffprocentries) { 700 while (1) { 701 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 702 if (!flg) break; 703 704 for (i=0; i<n; ) { 705 /* Now identify the consecutive vals belonging to the same row */ 706 for (j=i,rstart=row[j]; j<n; j++) { 707 if (row[j] != rstart) break; 708 } 709 if (j < n) ncols = j-i; 710 else ncols = n-i; 711 /* Now assemble all these values with a single function call */ 712 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 713 714 i = j; 715 } 716 } 717 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 718 } 719 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 720 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 721 722 /* determine if any processor has disassembled, if so we must 723 also disassemble ourselfs, in order that we may reassemble. */ 724 /* 725 if nonzero structure of submatrix B cannot change then we know that 726 no processor disassembled thus we can skip this stuff 727 */ 728 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 729 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 730 if (mat->was_assembled && !other_disassembled) { 731 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 732 } 733 } 734 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 735 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 736 } 737 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 738 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 739 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 740 741 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 742 743 aij->rowvalues = 0; 744 745 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 746 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 747 748 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 749 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 750 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 751 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 752 } 753 PetscFunctionReturn(0); 754 } 755 756 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 757 { 758 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 759 PetscErrorCode ierr; 760 761 PetscFunctionBegin; 762 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 763 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 768 { 769 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 770 PetscInt *lrows; 771 PetscInt r, len; 772 PetscErrorCode ierr; 773 774 PetscFunctionBegin; 775 /* get locally owned rows */ 776 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 777 /* fix right hand side if needed */ 778 if (x && b) { 779 const PetscScalar *xx; 780 PetscScalar *bb; 781 782 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 783 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 784 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 785 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 786 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 787 } 788 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 789 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 790 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 791 PetscBool cong; 792 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 793 if (cong) A->congruentlayouts = 1; 794 else A->congruentlayouts = 0; 795 } 796 if ((diag != 0.0) && A->congruentlayouts) { 797 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 798 } else if (diag != 0.0) { 799 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 800 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 801 for (r = 0; r < len; ++r) { 802 const PetscInt row = lrows[r] + A->rmap->rstart; 803 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 804 } 805 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 806 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 807 } else { 808 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 809 } 810 ierr = PetscFree(lrows);CHKERRQ(ierr); 811 812 /* only change matrix nonzero state if pattern was allowed to be changed */ 813 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 814 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 815 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 816 } 817 PetscFunctionReturn(0); 818 } 819 820 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 821 { 822 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 823 PetscErrorCode ierr; 824 PetscMPIInt n = A->rmap->n; 825 PetscInt i,j,r,m,p = 0,len = 0; 826 PetscInt *lrows,*owners = A->rmap->range; 827 PetscSFNode *rrows; 828 PetscSF sf; 829 const PetscScalar *xx; 830 PetscScalar *bb,*mask; 831 Vec xmask,lmask; 832 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 833 const PetscInt *aj, *ii,*ridx; 834 PetscScalar *aa; 835 836 PetscFunctionBegin; 837 /* Create SF where leaves are input rows and roots are owned rows */ 838 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 839 for (r = 0; r < n; ++r) lrows[r] = -1; 840 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 841 for (r = 0; r < N; ++r) { 842 const PetscInt idx = rows[r]; 843 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 844 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 845 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 846 } 847 rrows[r].rank = p; 848 rrows[r].index = rows[r] - owners[p]; 849 } 850 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 851 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 852 /* Collect flags for rows to be zeroed */ 853 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 854 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 855 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 856 /* Compress and put in row numbers */ 857 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 858 /* zero diagonal part of matrix */ 859 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 860 /* handle off diagonal part of matrix */ 861 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 862 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 863 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 864 for (i=0; i<len; i++) bb[lrows[i]] = 1; 865 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 866 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 869 if (x) { 870 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 871 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 872 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 873 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 874 } 875 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 876 /* remove zeroed rows of off diagonal matrix */ 877 ii = aij->i; 878 for (i=0; i<len; i++) { 879 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 880 } 881 /* loop over all elements of off process part of matrix zeroing removed columns*/ 882 if (aij->compressedrow.use) { 883 m = aij->compressedrow.nrows; 884 ii = aij->compressedrow.i; 885 ridx = aij->compressedrow.rindex; 886 for (i=0; i<m; i++) { 887 n = ii[i+1] - ii[i]; 888 aj = aij->j + ii[i]; 889 aa = aij->a + ii[i]; 890 891 for (j=0; j<n; j++) { 892 if (PetscAbsScalar(mask[*aj])) { 893 if (b) bb[*ridx] -= *aa*xx[*aj]; 894 *aa = 0.0; 895 } 896 aa++; 897 aj++; 898 } 899 ridx++; 900 } 901 } else { /* do not use compressed row format */ 902 m = l->B->rmap->n; 903 for (i=0; i<m; i++) { 904 n = ii[i+1] - ii[i]; 905 aj = aij->j + ii[i]; 906 aa = aij->a + ii[i]; 907 for (j=0; j<n; j++) { 908 if (PetscAbsScalar(mask[*aj])) { 909 if (b) bb[i] -= *aa*xx[*aj]; 910 *aa = 0.0; 911 } 912 aa++; 913 aj++; 914 } 915 } 916 } 917 if (x) { 918 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 919 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 920 } 921 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 922 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 923 ierr = PetscFree(lrows);CHKERRQ(ierr); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 927 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 928 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 929 } 930 PetscFunctionReturn(0); 931 } 932 933 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 934 { 935 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 936 PetscErrorCode ierr; 937 PetscInt nt; 938 939 PetscFunctionBegin; 940 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 941 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 942 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 944 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 945 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 950 { 951 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 952 PetscErrorCode ierr; 953 954 PetscFunctionBegin; 955 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 956 PetscFunctionReturn(0); 957 } 958 959 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 960 { 961 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 962 PetscErrorCode ierr; 963 964 PetscFunctionBegin; 965 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 967 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 968 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 973 { 974 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 975 PetscErrorCode ierr; 976 PetscBool merged; 977 978 PetscFunctionBegin; 979 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 980 /* do nondiagonal part */ 981 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 982 if (!merged) { 983 /* send it on its way */ 984 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 985 /* do local part */ 986 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 987 /* receive remote parts: note this assumes the values are not actually */ 988 /* added in yy until the next line, */ 989 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 990 } else { 991 /* do local part */ 992 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 993 /* send it on its way */ 994 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 995 /* values actually were received in the Begin() but we need to call this nop */ 996 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 997 } 998 PetscFunctionReturn(0); 999 } 1000 1001 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1002 { 1003 MPI_Comm comm; 1004 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1005 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1006 IS Me,Notme; 1007 PetscErrorCode ierr; 1008 PetscInt M,N,first,last,*notme,i; 1009 PetscMPIInt size; 1010 1011 PetscFunctionBegin; 1012 /* Easy test: symmetric diagonal block */ 1013 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1014 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1015 if (!*f) PetscFunctionReturn(0); 1016 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1017 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1018 if (size == 1) PetscFunctionReturn(0); 1019 1020 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1021 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1022 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1023 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1024 for (i=0; i<first; i++) notme[i] = i; 1025 for (i=last; i<M; i++) notme[i-last+first] = i; 1026 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1027 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1028 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1029 Aoff = Aoffs[0]; 1030 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1031 Boff = Boffs[0]; 1032 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1033 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1034 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1035 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1036 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1037 ierr = PetscFree(notme);CHKERRQ(ierr); 1038 PetscFunctionReturn(0); 1039 } 1040 1041 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1042 { 1043 PetscErrorCode ierr; 1044 1045 PetscFunctionBegin; 1046 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1047 PetscFunctionReturn(0); 1048 } 1049 1050 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1051 { 1052 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1053 PetscErrorCode ierr; 1054 1055 PetscFunctionBegin; 1056 /* do nondiagonal part */ 1057 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1058 /* send it on its way */ 1059 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1060 /* do local part */ 1061 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1062 /* receive remote parts */ 1063 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 /* 1068 This only works correctly for square matrices where the subblock A->A is the 1069 diagonal block 1070 */ 1071 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1072 { 1073 PetscErrorCode ierr; 1074 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1075 1076 PetscFunctionBegin; 1077 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1078 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1079 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1080 PetscFunctionReturn(0); 1081 } 1082 1083 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1084 { 1085 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1086 PetscErrorCode ierr; 1087 1088 PetscFunctionBegin; 1089 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1090 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1095 { 1096 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 #if defined(PETSC_USE_LOG) 1101 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1102 #endif 1103 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1104 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1105 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1106 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1107 #if defined(PETSC_USE_CTABLE) 1108 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1109 #else 1110 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1111 #endif 1112 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1113 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1114 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1115 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1116 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1117 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1118 1119 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1120 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1121 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1125 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1126 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1127 #if defined(PETSC_HAVE_ELEMENTAL) 1128 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1129 #endif 1130 #if defined(PETSC_HAVE_HYPRE) 1131 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1132 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1133 #endif 1134 PetscFunctionReturn(0); 1135 } 1136 1137 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1138 { 1139 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1140 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1141 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1142 PetscErrorCode ierr; 1143 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1144 int fd; 1145 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1146 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1147 PetscScalar *column_values; 1148 PetscInt message_count,flowcontrolcount; 1149 FILE *file; 1150 1151 PetscFunctionBegin; 1152 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1153 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1154 nz = A->nz + B->nz; 1155 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1156 if (!rank) { 1157 header[0] = MAT_FILE_CLASSID; 1158 header[1] = mat->rmap->N; 1159 header[2] = mat->cmap->N; 1160 1161 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1162 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1163 /* get largest number of rows any processor has */ 1164 rlen = mat->rmap->n; 1165 range = mat->rmap->range; 1166 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1167 } else { 1168 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1169 rlen = mat->rmap->n; 1170 } 1171 1172 /* load up the local row counts */ 1173 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1174 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1175 1176 /* store the row lengths to the file */ 1177 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1178 if (!rank) { 1179 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1180 for (i=1; i<size; i++) { 1181 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1182 rlen = range[i+1] - range[i]; 1183 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1184 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1185 } 1186 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1187 } else { 1188 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1189 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1190 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1191 } 1192 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1193 1194 /* load up the local column indices */ 1195 nzmax = nz; /* th processor needs space a largest processor needs */ 1196 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1197 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1198 cnt = 0; 1199 for (i=0; i<mat->rmap->n; i++) { 1200 for (j=B->i[i]; j<B->i[i+1]; j++) { 1201 if ((col = garray[B->j[j]]) > cstart) break; 1202 column_indices[cnt++] = col; 1203 } 1204 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1205 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1206 } 1207 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1208 1209 /* store the column indices to the file */ 1210 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1211 if (!rank) { 1212 MPI_Status status; 1213 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1214 for (i=1; i<size; i++) { 1215 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1216 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1217 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1218 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1219 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1220 } 1221 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1222 } else { 1223 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1224 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1226 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1227 } 1228 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1229 1230 /* load up the local column values */ 1231 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1232 cnt = 0; 1233 for (i=0; i<mat->rmap->n; i++) { 1234 for (j=B->i[i]; j<B->i[i+1]; j++) { 1235 if (garray[B->j[j]] > cstart) break; 1236 column_values[cnt++] = B->a[j]; 1237 } 1238 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1239 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1240 } 1241 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1242 1243 /* store the column values to the file */ 1244 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1245 if (!rank) { 1246 MPI_Status status; 1247 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1248 for (i=1; i<size; i++) { 1249 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1250 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1251 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1252 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1253 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1254 } 1255 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1256 } else { 1257 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1258 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1259 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1260 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1261 } 1262 ierr = PetscFree(column_values);CHKERRQ(ierr); 1263 1264 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1265 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 #include <petscdraw.h> 1270 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1271 { 1272 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1273 PetscErrorCode ierr; 1274 PetscMPIInt rank = aij->rank,size = aij->size; 1275 PetscBool isdraw,iascii,isbinary; 1276 PetscViewer sviewer; 1277 PetscViewerFormat format; 1278 1279 PetscFunctionBegin; 1280 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1281 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1282 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1283 if (iascii) { 1284 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1285 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1286 MatInfo info; 1287 PetscBool inodes; 1288 1289 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1290 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1291 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1292 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1293 if (!inodes) { 1294 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1295 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1296 } else { 1297 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1298 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1299 } 1300 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1301 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1302 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1303 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1304 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1305 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1306 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1307 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1308 PetscFunctionReturn(0); 1309 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1310 PetscInt inodecount,inodelimit,*inodes; 1311 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1312 if (inodes) { 1313 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1314 } else { 1315 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1316 } 1317 PetscFunctionReturn(0); 1318 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1319 PetscFunctionReturn(0); 1320 } 1321 } else if (isbinary) { 1322 if (size == 1) { 1323 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1324 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1325 } else { 1326 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1327 } 1328 PetscFunctionReturn(0); 1329 } else if (isdraw) { 1330 PetscDraw draw; 1331 PetscBool isnull; 1332 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1333 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1334 if (isnull) PetscFunctionReturn(0); 1335 } 1336 1337 { 1338 /* assemble the entire matrix onto first processor. */ 1339 Mat A; 1340 Mat_SeqAIJ *Aloc; 1341 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1342 MatScalar *a; 1343 1344 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1345 if (!rank) { 1346 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1347 } else { 1348 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1349 } 1350 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1351 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1352 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1353 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1354 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1355 1356 /* copy over the A part */ 1357 Aloc = (Mat_SeqAIJ*)aij->A->data; 1358 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1359 row = mat->rmap->rstart; 1360 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1361 for (i=0; i<m; i++) { 1362 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1363 row++; 1364 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1365 } 1366 aj = Aloc->j; 1367 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1368 1369 /* copy over the B part */ 1370 Aloc = (Mat_SeqAIJ*)aij->B->data; 1371 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1372 row = mat->rmap->rstart; 1373 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1374 ct = cols; 1375 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1376 for (i=0; i<m; i++) { 1377 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1378 row++; 1379 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1380 } 1381 ierr = PetscFree(ct);CHKERRQ(ierr); 1382 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1383 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1384 /* 1385 Everyone has to call to draw the matrix since the graphics waits are 1386 synchronized across all processors that share the PetscDraw object 1387 */ 1388 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1389 if (!rank) { 1390 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1391 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1392 } 1393 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1394 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1395 ierr = MatDestroy(&A);CHKERRQ(ierr); 1396 } 1397 PetscFunctionReturn(0); 1398 } 1399 1400 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1401 { 1402 PetscErrorCode ierr; 1403 PetscBool iascii,isdraw,issocket,isbinary; 1404 1405 PetscFunctionBegin; 1406 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1407 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1408 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1409 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1410 if (iascii || isdraw || isbinary || issocket) { 1411 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1412 } 1413 PetscFunctionReturn(0); 1414 } 1415 1416 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1417 { 1418 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1419 PetscErrorCode ierr; 1420 Vec bb1 = 0; 1421 PetscBool hasop; 1422 1423 PetscFunctionBegin; 1424 if (flag == SOR_APPLY_UPPER) { 1425 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1426 PetscFunctionReturn(0); 1427 } 1428 1429 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1430 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1431 } 1432 1433 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1434 if (flag & SOR_ZERO_INITIAL_GUESS) { 1435 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1436 its--; 1437 } 1438 1439 while (its--) { 1440 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1441 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1442 1443 /* update rhs: bb1 = bb - B*x */ 1444 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1445 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1446 1447 /* local sweep */ 1448 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1449 } 1450 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1451 if (flag & SOR_ZERO_INITIAL_GUESS) { 1452 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1453 its--; 1454 } 1455 while (its--) { 1456 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1457 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1458 1459 /* update rhs: bb1 = bb - B*x */ 1460 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1461 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1462 1463 /* local sweep */ 1464 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1465 } 1466 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1467 if (flag & SOR_ZERO_INITIAL_GUESS) { 1468 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1469 its--; 1470 } 1471 while (its--) { 1472 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1473 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1474 1475 /* update rhs: bb1 = bb - B*x */ 1476 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1477 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1478 1479 /* local sweep */ 1480 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1481 } 1482 } else if (flag & SOR_EISENSTAT) { 1483 Vec xx1; 1484 1485 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1486 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1487 1488 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1489 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1490 if (!mat->diag) { 1491 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1492 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1493 } 1494 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1495 if (hasop) { 1496 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1497 } else { 1498 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1499 } 1500 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1501 1502 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1503 1504 /* local sweep */ 1505 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1506 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1507 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1508 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1509 1510 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1511 1512 matin->factorerrortype = mat->A->factorerrortype; 1513 PetscFunctionReturn(0); 1514 } 1515 1516 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1517 { 1518 Mat aA,aB,Aperm; 1519 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1520 PetscScalar *aa,*ba; 1521 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1522 PetscSF rowsf,sf; 1523 IS parcolp = NULL; 1524 PetscBool done; 1525 PetscErrorCode ierr; 1526 1527 PetscFunctionBegin; 1528 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1529 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1530 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1531 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1532 1533 /* Invert row permutation to find out where my rows should go */ 1534 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1535 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1536 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1537 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1538 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1539 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1540 1541 /* Invert column permutation to find out where my columns should go */ 1542 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1543 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1544 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1545 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1546 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1547 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1548 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1549 1550 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1551 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1552 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1553 1554 /* Find out where my gcols should go */ 1555 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1556 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1557 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1558 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1559 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1560 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1561 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1562 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1563 1564 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1565 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1566 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1567 for (i=0; i<m; i++) { 1568 PetscInt row = rdest[i],rowner; 1569 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1570 for (j=ai[i]; j<ai[i+1]; j++) { 1571 PetscInt cowner,col = cdest[aj[j]]; 1572 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1573 if (rowner == cowner) dnnz[i]++; 1574 else onnz[i]++; 1575 } 1576 for (j=bi[i]; j<bi[i+1]; j++) { 1577 PetscInt cowner,col = gcdest[bj[j]]; 1578 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1579 if (rowner == cowner) dnnz[i]++; 1580 else onnz[i]++; 1581 } 1582 } 1583 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1584 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1585 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1586 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1587 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1588 1589 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1590 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1591 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1592 for (i=0; i<m; i++) { 1593 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1594 PetscInt j0,rowlen; 1595 rowlen = ai[i+1] - ai[i]; 1596 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1597 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1598 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1599 } 1600 rowlen = bi[i+1] - bi[i]; 1601 for (j0=j=0; j<rowlen; j0=j) { 1602 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1603 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1604 } 1605 } 1606 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1607 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1608 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1609 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1610 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1611 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1612 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1613 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1614 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1615 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1616 *B = Aperm; 1617 PetscFunctionReturn(0); 1618 } 1619 1620 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1621 { 1622 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1623 PetscErrorCode ierr; 1624 1625 PetscFunctionBegin; 1626 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1627 if (ghosts) *ghosts = aij->garray; 1628 PetscFunctionReturn(0); 1629 } 1630 1631 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1632 { 1633 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1634 Mat A = mat->A,B = mat->B; 1635 PetscErrorCode ierr; 1636 PetscReal isend[5],irecv[5]; 1637 1638 PetscFunctionBegin; 1639 info->block_size = 1.0; 1640 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1641 1642 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1643 isend[3] = info->memory; isend[4] = info->mallocs; 1644 1645 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1646 1647 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1648 isend[3] += info->memory; isend[4] += info->mallocs; 1649 if (flag == MAT_LOCAL) { 1650 info->nz_used = isend[0]; 1651 info->nz_allocated = isend[1]; 1652 info->nz_unneeded = isend[2]; 1653 info->memory = isend[3]; 1654 info->mallocs = isend[4]; 1655 } else if (flag == MAT_GLOBAL_MAX) { 1656 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1657 1658 info->nz_used = irecv[0]; 1659 info->nz_allocated = irecv[1]; 1660 info->nz_unneeded = irecv[2]; 1661 info->memory = irecv[3]; 1662 info->mallocs = irecv[4]; 1663 } else if (flag == MAT_GLOBAL_SUM) { 1664 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1665 1666 info->nz_used = irecv[0]; 1667 info->nz_allocated = irecv[1]; 1668 info->nz_unneeded = irecv[2]; 1669 info->memory = irecv[3]; 1670 info->mallocs = irecv[4]; 1671 } 1672 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1673 info->fill_ratio_needed = 0; 1674 info->factor_mallocs = 0; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1679 { 1680 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1681 PetscErrorCode ierr; 1682 1683 PetscFunctionBegin; 1684 switch (op) { 1685 case MAT_NEW_NONZERO_LOCATIONS: 1686 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1687 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1688 case MAT_KEEP_NONZERO_PATTERN: 1689 case MAT_NEW_NONZERO_LOCATION_ERR: 1690 case MAT_USE_INODES: 1691 case MAT_IGNORE_ZERO_ENTRIES: 1692 MatCheckPreallocated(A,1); 1693 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1694 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1695 break; 1696 case MAT_ROW_ORIENTED: 1697 MatCheckPreallocated(A,1); 1698 a->roworiented = flg; 1699 1700 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1701 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1702 break; 1703 case MAT_NEW_DIAGONALS: 1704 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1705 break; 1706 case MAT_IGNORE_OFF_PROC_ENTRIES: 1707 a->donotstash = flg; 1708 break; 1709 case MAT_SPD: 1710 A->spd_set = PETSC_TRUE; 1711 A->spd = flg; 1712 if (flg) { 1713 A->symmetric = PETSC_TRUE; 1714 A->structurally_symmetric = PETSC_TRUE; 1715 A->symmetric_set = PETSC_TRUE; 1716 A->structurally_symmetric_set = PETSC_TRUE; 1717 } 1718 break; 1719 case MAT_SYMMETRIC: 1720 MatCheckPreallocated(A,1); 1721 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1722 break; 1723 case MAT_STRUCTURALLY_SYMMETRIC: 1724 MatCheckPreallocated(A,1); 1725 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1726 break; 1727 case MAT_HERMITIAN: 1728 MatCheckPreallocated(A,1); 1729 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1730 break; 1731 case MAT_SYMMETRY_ETERNAL: 1732 MatCheckPreallocated(A,1); 1733 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1734 break; 1735 case MAT_SUBMAT_SINGLEIS: 1736 A->submat_singleis = flg; 1737 break; 1738 case MAT_STRUCTURE_ONLY: 1739 /* The option is handled directly by MatSetOption() */ 1740 break; 1741 default: 1742 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1743 } 1744 PetscFunctionReturn(0); 1745 } 1746 1747 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1748 { 1749 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1750 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1751 PetscErrorCode ierr; 1752 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1753 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1754 PetscInt *cmap,*idx_p; 1755 1756 PetscFunctionBegin; 1757 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1758 mat->getrowactive = PETSC_TRUE; 1759 1760 if (!mat->rowvalues && (idx || v)) { 1761 /* 1762 allocate enough space to hold information from the longest row. 1763 */ 1764 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1765 PetscInt max = 1,tmp; 1766 for (i=0; i<matin->rmap->n; i++) { 1767 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1768 if (max < tmp) max = tmp; 1769 } 1770 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1771 } 1772 1773 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1774 lrow = row - rstart; 1775 1776 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1777 if (!v) {pvA = 0; pvB = 0;} 1778 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1779 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1780 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1781 nztot = nzA + nzB; 1782 1783 cmap = mat->garray; 1784 if (v || idx) { 1785 if (nztot) { 1786 /* Sort by increasing column numbers, assuming A and B already sorted */ 1787 PetscInt imark = -1; 1788 if (v) { 1789 *v = v_p = mat->rowvalues; 1790 for (i=0; i<nzB; i++) { 1791 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1792 else break; 1793 } 1794 imark = i; 1795 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1796 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1797 } 1798 if (idx) { 1799 *idx = idx_p = mat->rowindices; 1800 if (imark > -1) { 1801 for (i=0; i<imark; i++) { 1802 idx_p[i] = cmap[cworkB[i]]; 1803 } 1804 } else { 1805 for (i=0; i<nzB; i++) { 1806 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1807 else break; 1808 } 1809 imark = i; 1810 } 1811 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1812 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1813 } 1814 } else { 1815 if (idx) *idx = 0; 1816 if (v) *v = 0; 1817 } 1818 } 1819 *nz = nztot; 1820 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1821 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1822 PetscFunctionReturn(0); 1823 } 1824 1825 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1826 { 1827 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1828 1829 PetscFunctionBegin; 1830 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1831 aij->getrowactive = PETSC_FALSE; 1832 PetscFunctionReturn(0); 1833 } 1834 1835 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1836 { 1837 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1838 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1839 PetscErrorCode ierr; 1840 PetscInt i,j,cstart = mat->cmap->rstart; 1841 PetscReal sum = 0.0; 1842 MatScalar *v; 1843 1844 PetscFunctionBegin; 1845 if (aij->size == 1) { 1846 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1847 } else { 1848 if (type == NORM_FROBENIUS) { 1849 v = amat->a; 1850 for (i=0; i<amat->nz; i++) { 1851 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1852 } 1853 v = bmat->a; 1854 for (i=0; i<bmat->nz; i++) { 1855 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1856 } 1857 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1858 *norm = PetscSqrtReal(*norm); 1859 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1860 } else if (type == NORM_1) { /* max column norm */ 1861 PetscReal *tmp,*tmp2; 1862 PetscInt *jj,*garray = aij->garray; 1863 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1864 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1865 *norm = 0.0; 1866 v = amat->a; jj = amat->j; 1867 for (j=0; j<amat->nz; j++) { 1868 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1869 } 1870 v = bmat->a; jj = bmat->j; 1871 for (j=0; j<bmat->nz; j++) { 1872 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1873 } 1874 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1875 for (j=0; j<mat->cmap->N; j++) { 1876 if (tmp2[j] > *norm) *norm = tmp2[j]; 1877 } 1878 ierr = PetscFree(tmp);CHKERRQ(ierr); 1879 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1880 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1881 } else if (type == NORM_INFINITY) { /* max row norm */ 1882 PetscReal ntemp = 0.0; 1883 for (j=0; j<aij->A->rmap->n; j++) { 1884 v = amat->a + amat->i[j]; 1885 sum = 0.0; 1886 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1887 sum += PetscAbsScalar(*v); v++; 1888 } 1889 v = bmat->a + bmat->i[j]; 1890 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1891 sum += PetscAbsScalar(*v); v++; 1892 } 1893 if (sum > ntemp) ntemp = sum; 1894 } 1895 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1896 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1897 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1898 } 1899 PetscFunctionReturn(0); 1900 } 1901 1902 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1903 { 1904 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1905 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1906 PetscErrorCode ierr; 1907 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1908 PetscInt cstart = A->cmap->rstart,ncol; 1909 Mat B; 1910 MatScalar *array; 1911 1912 PetscFunctionBegin; 1913 if (reuse == MAT_INPLACE_MATRIX && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1914 1915 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1916 ai = Aloc->i; aj = Aloc->j; 1917 bi = Bloc->i; bj = Bloc->j; 1918 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1919 PetscInt *d_nnz,*g_nnz,*o_nnz; 1920 PetscSFNode *oloc; 1921 PETSC_UNUSED PetscSF sf; 1922 1923 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1924 /* compute d_nnz for preallocation */ 1925 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1926 for (i=0; i<ai[ma]; i++) { 1927 d_nnz[aj[i]]++; 1928 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1929 } 1930 /* compute local off-diagonal contributions */ 1931 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1932 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1933 /* map those to global */ 1934 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1935 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1936 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1937 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1938 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1939 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1940 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1941 1942 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1943 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1944 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1945 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1946 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1947 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1948 } else { 1949 B = *matout; 1950 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1951 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1952 } 1953 1954 /* copy over the A part */ 1955 array = Aloc->a; 1956 row = A->rmap->rstart; 1957 for (i=0; i<ma; i++) { 1958 ncol = ai[i+1]-ai[i]; 1959 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1960 row++; 1961 array += ncol; aj += ncol; 1962 } 1963 aj = Aloc->j; 1964 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 1965 1966 /* copy over the B part */ 1967 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 1968 array = Bloc->a; 1969 row = A->rmap->rstart; 1970 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1971 cols_tmp = cols; 1972 for (i=0; i<mb; i++) { 1973 ncol = bi[i+1]-bi[i]; 1974 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 1975 row++; 1976 array += ncol; cols_tmp += ncol; 1977 } 1978 ierr = PetscFree(cols);CHKERRQ(ierr); 1979 1980 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1981 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1982 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1983 *matout = B; 1984 } else { 1985 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 1986 } 1987 PetscFunctionReturn(0); 1988 } 1989 1990 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 1991 { 1992 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1993 Mat a = aij->A,b = aij->B; 1994 PetscErrorCode ierr; 1995 PetscInt s1,s2,s3; 1996 1997 PetscFunctionBegin; 1998 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 1999 if (rr) { 2000 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2001 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2002 /* Overlap communication with computation. */ 2003 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2004 } 2005 if (ll) { 2006 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2007 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2008 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2009 } 2010 /* scale the diagonal block */ 2011 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2012 2013 if (rr) { 2014 /* Do a scatter end and then right scale the off-diagonal block */ 2015 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2016 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2017 } 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2022 { 2023 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2024 PetscErrorCode ierr; 2025 2026 PetscFunctionBegin; 2027 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2028 PetscFunctionReturn(0); 2029 } 2030 2031 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2032 { 2033 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2034 Mat a,b,c,d; 2035 PetscBool flg; 2036 PetscErrorCode ierr; 2037 2038 PetscFunctionBegin; 2039 a = matA->A; b = matA->B; 2040 c = matB->A; d = matB->B; 2041 2042 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2043 if (flg) { 2044 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2045 } 2046 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2047 PetscFunctionReturn(0); 2048 } 2049 2050 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2051 { 2052 PetscErrorCode ierr; 2053 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2054 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2055 2056 PetscFunctionBegin; 2057 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2058 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2059 /* because of the column compression in the off-processor part of the matrix a->B, 2060 the number of columns in a->B and b->B may be different, hence we cannot call 2061 the MatCopy() directly on the two parts. If need be, we can provide a more 2062 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2063 then copying the submatrices */ 2064 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2065 } else { 2066 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2067 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2068 } 2069 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2070 PetscFunctionReturn(0); 2071 } 2072 2073 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2074 { 2075 PetscErrorCode ierr; 2076 2077 PetscFunctionBegin; 2078 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2079 PetscFunctionReturn(0); 2080 } 2081 2082 /* 2083 Computes the number of nonzeros per row needed for preallocation when X and Y 2084 have different nonzero structure. 2085 */ 2086 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2087 { 2088 PetscInt i,j,k,nzx,nzy; 2089 2090 PetscFunctionBegin; 2091 /* Set the number of nonzeros in the new matrix */ 2092 for (i=0; i<m; i++) { 2093 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2094 nzx = xi[i+1] - xi[i]; 2095 nzy = yi[i+1] - yi[i]; 2096 nnz[i] = 0; 2097 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2098 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2099 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2100 nnz[i]++; 2101 } 2102 for (; k<nzy; k++) nnz[i]++; 2103 } 2104 PetscFunctionReturn(0); 2105 } 2106 2107 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2108 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2109 { 2110 PetscErrorCode ierr; 2111 PetscInt m = Y->rmap->N; 2112 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2113 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2114 2115 PetscFunctionBegin; 2116 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2117 PetscFunctionReturn(0); 2118 } 2119 2120 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2121 { 2122 PetscErrorCode ierr; 2123 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2124 PetscBLASInt bnz,one=1; 2125 Mat_SeqAIJ *x,*y; 2126 2127 PetscFunctionBegin; 2128 if (str == SAME_NONZERO_PATTERN) { 2129 PetscScalar alpha = a; 2130 x = (Mat_SeqAIJ*)xx->A->data; 2131 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2132 y = (Mat_SeqAIJ*)yy->A->data; 2133 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2134 x = (Mat_SeqAIJ*)xx->B->data; 2135 y = (Mat_SeqAIJ*)yy->B->data; 2136 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2137 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2138 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2139 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2140 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2141 } else { 2142 Mat B; 2143 PetscInt *nnz_d,*nnz_o; 2144 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2145 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2146 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2147 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2148 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2149 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2150 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2151 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2152 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2153 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2154 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2155 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2156 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2157 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2158 } 2159 PetscFunctionReturn(0); 2160 } 2161 2162 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2163 2164 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2165 { 2166 #if defined(PETSC_USE_COMPLEX) 2167 PetscErrorCode ierr; 2168 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2169 2170 PetscFunctionBegin; 2171 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2172 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2173 #else 2174 PetscFunctionBegin; 2175 #endif 2176 PetscFunctionReturn(0); 2177 } 2178 2179 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2180 { 2181 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2182 PetscErrorCode ierr; 2183 2184 PetscFunctionBegin; 2185 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2186 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2187 PetscFunctionReturn(0); 2188 } 2189 2190 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2191 { 2192 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2193 PetscErrorCode ierr; 2194 2195 PetscFunctionBegin; 2196 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2197 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2198 PetscFunctionReturn(0); 2199 } 2200 2201 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2204 PetscErrorCode ierr; 2205 PetscInt i,*idxb = 0; 2206 PetscScalar *va,*vb; 2207 Vec vtmp; 2208 2209 PetscFunctionBegin; 2210 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2211 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2212 if (idx) { 2213 for (i=0; i<A->rmap->n; i++) { 2214 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2215 } 2216 } 2217 2218 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2219 if (idx) { 2220 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2221 } 2222 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2223 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2224 2225 for (i=0; i<A->rmap->n; i++) { 2226 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2227 va[i] = vb[i]; 2228 if (idx) idx[i] = a->garray[idxb[i]]; 2229 } 2230 } 2231 2232 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2233 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2234 ierr = PetscFree(idxb);CHKERRQ(ierr); 2235 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2236 PetscFunctionReturn(0); 2237 } 2238 2239 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2240 { 2241 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2242 PetscErrorCode ierr; 2243 PetscInt i,*idxb = 0; 2244 PetscScalar *va,*vb; 2245 Vec vtmp; 2246 2247 PetscFunctionBegin; 2248 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2249 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2250 if (idx) { 2251 for (i=0; i<A->cmap->n; i++) { 2252 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2253 } 2254 } 2255 2256 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2257 if (idx) { 2258 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2259 } 2260 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2261 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2262 2263 for (i=0; i<A->rmap->n; i++) { 2264 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2265 va[i] = vb[i]; 2266 if (idx) idx[i] = a->garray[idxb[i]]; 2267 } 2268 } 2269 2270 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2271 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2272 ierr = PetscFree(idxb);CHKERRQ(ierr); 2273 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2274 PetscFunctionReturn(0); 2275 } 2276 2277 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2278 { 2279 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2280 PetscInt n = A->rmap->n; 2281 PetscInt cstart = A->cmap->rstart; 2282 PetscInt *cmap = mat->garray; 2283 PetscInt *diagIdx, *offdiagIdx; 2284 Vec diagV, offdiagV; 2285 PetscScalar *a, *diagA, *offdiagA; 2286 PetscInt r; 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2291 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2292 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2293 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2294 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2295 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2296 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2297 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2298 for (r = 0; r < n; ++r) { 2299 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2300 a[r] = diagA[r]; 2301 idx[r] = cstart + diagIdx[r]; 2302 } else { 2303 a[r] = offdiagA[r]; 2304 idx[r] = cmap[offdiagIdx[r]]; 2305 } 2306 } 2307 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2308 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2309 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2310 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2311 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2312 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2313 PetscFunctionReturn(0); 2314 } 2315 2316 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2317 { 2318 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2319 PetscInt n = A->rmap->n; 2320 PetscInt cstart = A->cmap->rstart; 2321 PetscInt *cmap = mat->garray; 2322 PetscInt *diagIdx, *offdiagIdx; 2323 Vec diagV, offdiagV; 2324 PetscScalar *a, *diagA, *offdiagA; 2325 PetscInt r; 2326 PetscErrorCode ierr; 2327 2328 PetscFunctionBegin; 2329 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2330 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2331 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2332 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2333 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2334 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2335 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2336 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2337 for (r = 0; r < n; ++r) { 2338 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2339 a[r] = diagA[r]; 2340 idx[r] = cstart + diagIdx[r]; 2341 } else { 2342 a[r] = offdiagA[r]; 2343 idx[r] = cmap[offdiagIdx[r]]; 2344 } 2345 } 2346 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2347 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2348 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2349 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2350 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2351 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2352 PetscFunctionReturn(0); 2353 } 2354 2355 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2356 { 2357 PetscErrorCode ierr; 2358 Mat *dummy; 2359 2360 PetscFunctionBegin; 2361 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2362 *newmat = *dummy; 2363 ierr = PetscFree(dummy);CHKERRQ(ierr); 2364 PetscFunctionReturn(0); 2365 } 2366 2367 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2368 { 2369 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2370 PetscErrorCode ierr; 2371 2372 PetscFunctionBegin; 2373 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2374 A->factorerrortype = a->A->factorerrortype; 2375 PetscFunctionReturn(0); 2376 } 2377 2378 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2379 { 2380 PetscErrorCode ierr; 2381 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2382 2383 PetscFunctionBegin; 2384 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2385 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2386 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2387 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2388 PetscFunctionReturn(0); 2389 } 2390 2391 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2392 { 2393 PetscFunctionBegin; 2394 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2395 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2396 PetscFunctionReturn(0); 2397 } 2398 2399 /*@ 2400 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2401 2402 Collective on Mat 2403 2404 Input Parameters: 2405 + A - the matrix 2406 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2407 2408 Level: advanced 2409 2410 @*/ 2411 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2412 { 2413 PetscErrorCode ierr; 2414 2415 PetscFunctionBegin; 2416 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2421 { 2422 PetscErrorCode ierr; 2423 PetscBool sc = PETSC_FALSE,flg; 2424 2425 PetscFunctionBegin; 2426 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2427 ierr = PetscObjectOptionsBegin((PetscObject)A); 2428 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2429 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2430 if (flg) { 2431 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2432 } 2433 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2438 { 2439 PetscErrorCode ierr; 2440 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2441 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2442 2443 PetscFunctionBegin; 2444 if (!Y->preallocated) { 2445 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2446 } else if (!aij->nz) { 2447 PetscInt nonew = aij->nonew; 2448 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2449 aij->nonew = nonew; 2450 } 2451 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2456 { 2457 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2458 PetscErrorCode ierr; 2459 2460 PetscFunctionBegin; 2461 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2462 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2463 if (d) { 2464 PetscInt rstart; 2465 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2466 *d += rstart; 2467 2468 } 2469 PetscFunctionReturn(0); 2470 } 2471 2472 2473 /* -------------------------------------------------------------------*/ 2474 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2475 MatGetRow_MPIAIJ, 2476 MatRestoreRow_MPIAIJ, 2477 MatMult_MPIAIJ, 2478 /* 4*/ MatMultAdd_MPIAIJ, 2479 MatMultTranspose_MPIAIJ, 2480 MatMultTransposeAdd_MPIAIJ, 2481 0, 2482 0, 2483 0, 2484 /*10*/ 0, 2485 0, 2486 0, 2487 MatSOR_MPIAIJ, 2488 MatTranspose_MPIAIJ, 2489 /*15*/ MatGetInfo_MPIAIJ, 2490 MatEqual_MPIAIJ, 2491 MatGetDiagonal_MPIAIJ, 2492 MatDiagonalScale_MPIAIJ, 2493 MatNorm_MPIAIJ, 2494 /*20*/ MatAssemblyBegin_MPIAIJ, 2495 MatAssemblyEnd_MPIAIJ, 2496 MatSetOption_MPIAIJ, 2497 MatZeroEntries_MPIAIJ, 2498 /*24*/ MatZeroRows_MPIAIJ, 2499 0, 2500 0, 2501 0, 2502 0, 2503 /*29*/ MatSetUp_MPIAIJ, 2504 0, 2505 0, 2506 MatGetDiagonalBlock_MPIAIJ, 2507 0, 2508 /*34*/ MatDuplicate_MPIAIJ, 2509 0, 2510 0, 2511 0, 2512 0, 2513 /*39*/ MatAXPY_MPIAIJ, 2514 MatCreateSubMatrices_MPIAIJ, 2515 MatIncreaseOverlap_MPIAIJ, 2516 MatGetValues_MPIAIJ, 2517 MatCopy_MPIAIJ, 2518 /*44*/ MatGetRowMax_MPIAIJ, 2519 MatScale_MPIAIJ, 2520 MatShift_MPIAIJ, 2521 MatDiagonalSet_MPIAIJ, 2522 MatZeroRowsColumns_MPIAIJ, 2523 /*49*/ MatSetRandom_MPIAIJ, 2524 0, 2525 0, 2526 0, 2527 0, 2528 /*54*/ MatFDColoringCreate_MPIXAIJ, 2529 0, 2530 MatSetUnfactored_MPIAIJ, 2531 MatPermute_MPIAIJ, 2532 0, 2533 /*59*/ MatCreateSubMatrix_MPIAIJ, 2534 MatDestroy_MPIAIJ, 2535 MatView_MPIAIJ, 2536 0, 2537 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2538 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2539 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2540 0, 2541 0, 2542 0, 2543 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2544 MatGetRowMinAbs_MPIAIJ, 2545 0, 2546 0, 2547 0, 2548 0, 2549 /*75*/ MatFDColoringApply_AIJ, 2550 MatSetFromOptions_MPIAIJ, 2551 0, 2552 0, 2553 MatFindZeroDiagonals_MPIAIJ, 2554 /*80*/ 0, 2555 0, 2556 0, 2557 /*83*/ MatLoad_MPIAIJ, 2558 MatIsSymmetric_MPIAIJ, 2559 0, 2560 0, 2561 0, 2562 0, 2563 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2564 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2565 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2566 MatPtAP_MPIAIJ_MPIAIJ, 2567 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2568 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2569 0, 2570 0, 2571 0, 2572 0, 2573 /*99*/ 0, 2574 0, 2575 0, 2576 MatConjugate_MPIAIJ, 2577 0, 2578 /*104*/MatSetValuesRow_MPIAIJ, 2579 MatRealPart_MPIAIJ, 2580 MatImaginaryPart_MPIAIJ, 2581 0, 2582 0, 2583 /*109*/0, 2584 0, 2585 MatGetRowMin_MPIAIJ, 2586 0, 2587 MatMissingDiagonal_MPIAIJ, 2588 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2589 0, 2590 MatGetGhosts_MPIAIJ, 2591 0, 2592 0, 2593 /*119*/0, 2594 0, 2595 0, 2596 0, 2597 MatGetMultiProcBlock_MPIAIJ, 2598 /*124*/MatFindNonzeroRows_MPIAIJ, 2599 MatGetColumnNorms_MPIAIJ, 2600 MatInvertBlockDiagonal_MPIAIJ, 2601 0, 2602 MatCreateSubMatricesMPI_MPIAIJ, 2603 /*129*/0, 2604 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2605 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2606 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2607 0, 2608 /*134*/0, 2609 0, 2610 MatRARt_MPIAIJ_MPIAIJ, 2611 0, 2612 0, 2613 /*139*/MatSetBlockSizes_MPIAIJ, 2614 0, 2615 0, 2616 MatFDColoringSetUp_MPIXAIJ, 2617 MatFindOffBlockDiagonalEntries_MPIAIJ, 2618 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2619 }; 2620 2621 /* ----------------------------------------------------------------------------------------*/ 2622 2623 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2624 { 2625 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2626 PetscErrorCode ierr; 2627 2628 PetscFunctionBegin; 2629 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2630 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2631 PetscFunctionReturn(0); 2632 } 2633 2634 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2635 { 2636 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2637 PetscErrorCode ierr; 2638 2639 PetscFunctionBegin; 2640 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2641 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2642 PetscFunctionReturn(0); 2643 } 2644 2645 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2646 { 2647 Mat_MPIAIJ *b; 2648 PetscErrorCode ierr; 2649 2650 PetscFunctionBegin; 2651 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2652 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2653 b = (Mat_MPIAIJ*)B->data; 2654 2655 #if defined(PETSC_USE_CTABLE) 2656 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2657 #else 2658 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2659 #endif 2660 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2661 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2662 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2663 2664 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2665 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2666 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2667 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2668 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2669 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2670 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2671 2672 if (!B->preallocated) { 2673 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2674 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2675 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2676 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2677 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2678 } 2679 2680 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2681 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2682 B->preallocated = PETSC_TRUE; 2683 B->was_assembled = PETSC_FALSE; 2684 B->assembled = PETSC_FALSE;; 2685 PetscFunctionReturn(0); 2686 } 2687 2688 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2689 { 2690 Mat mat; 2691 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2692 PetscErrorCode ierr; 2693 2694 PetscFunctionBegin; 2695 *newmat = 0; 2696 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2697 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2698 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2699 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2700 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2701 a = (Mat_MPIAIJ*)mat->data; 2702 2703 mat->factortype = matin->factortype; 2704 mat->assembled = PETSC_TRUE; 2705 mat->insertmode = NOT_SET_VALUES; 2706 mat->preallocated = PETSC_TRUE; 2707 2708 a->size = oldmat->size; 2709 a->rank = oldmat->rank; 2710 a->donotstash = oldmat->donotstash; 2711 a->roworiented = oldmat->roworiented; 2712 a->rowindices = 0; 2713 a->rowvalues = 0; 2714 a->getrowactive = PETSC_FALSE; 2715 2716 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2717 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2718 2719 if (oldmat->colmap) { 2720 #if defined(PETSC_USE_CTABLE) 2721 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2722 #else 2723 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2724 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2725 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2726 #endif 2727 } else a->colmap = 0; 2728 if (oldmat->garray) { 2729 PetscInt len; 2730 len = oldmat->B->cmap->n; 2731 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2732 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2733 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2734 } else a->garray = 0; 2735 2736 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2737 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2738 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2739 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2740 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2741 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2742 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2743 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2744 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2745 *newmat = mat; 2746 PetscFunctionReturn(0); 2747 } 2748 2749 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2750 { 2751 PetscScalar *vals,*svals; 2752 MPI_Comm comm; 2753 PetscErrorCode ierr; 2754 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2755 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2756 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2757 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2758 PetscInt cend,cstart,n,*rowners; 2759 int fd; 2760 PetscInt bs = newMat->rmap->bs; 2761 2762 PetscFunctionBegin; 2763 /* force binary viewer to load .info file if it has not yet done so */ 2764 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2765 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2766 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2767 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2768 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2769 if (!rank) { 2770 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2771 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2772 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2773 } 2774 2775 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2776 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2777 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2778 if (bs < 0) bs = 1; 2779 2780 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2781 M = header[1]; N = header[2]; 2782 2783 /* If global sizes are set, check if they are consistent with that given in the file */ 2784 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2785 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2786 2787 /* determine ownership of all (block) rows */ 2788 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2789 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2790 else m = newMat->rmap->n; /* Set by user */ 2791 2792 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2793 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2794 2795 /* First process needs enough room for process with most rows */ 2796 if (!rank) { 2797 mmax = rowners[1]; 2798 for (i=2; i<=size; i++) { 2799 mmax = PetscMax(mmax, rowners[i]); 2800 } 2801 } else mmax = -1; /* unused, but compilers complain */ 2802 2803 rowners[0] = 0; 2804 for (i=2; i<=size; i++) { 2805 rowners[i] += rowners[i-1]; 2806 } 2807 rstart = rowners[rank]; 2808 rend = rowners[rank+1]; 2809 2810 /* distribute row lengths to all processors */ 2811 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2812 if (!rank) { 2813 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2814 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2815 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2816 for (j=0; j<m; j++) { 2817 procsnz[0] += ourlens[j]; 2818 } 2819 for (i=1; i<size; i++) { 2820 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2821 /* calculate the number of nonzeros on each processor */ 2822 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2823 procsnz[i] += rowlengths[j]; 2824 } 2825 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2826 } 2827 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2828 } else { 2829 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2830 } 2831 2832 if (!rank) { 2833 /* determine max buffer needed and allocate it */ 2834 maxnz = 0; 2835 for (i=0; i<size; i++) { 2836 maxnz = PetscMax(maxnz,procsnz[i]); 2837 } 2838 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2839 2840 /* read in my part of the matrix column indices */ 2841 nz = procsnz[0]; 2842 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2843 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2844 2845 /* read in every one elses and ship off */ 2846 for (i=1; i<size; i++) { 2847 nz = procsnz[i]; 2848 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2849 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2850 } 2851 ierr = PetscFree(cols);CHKERRQ(ierr); 2852 } else { 2853 /* determine buffer space needed for message */ 2854 nz = 0; 2855 for (i=0; i<m; i++) { 2856 nz += ourlens[i]; 2857 } 2858 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2859 2860 /* receive message of column indices*/ 2861 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2862 } 2863 2864 /* determine column ownership if matrix is not square */ 2865 if (N != M) { 2866 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2867 else n = newMat->cmap->n; 2868 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2869 cstart = cend - n; 2870 } else { 2871 cstart = rstart; 2872 cend = rend; 2873 n = cend - cstart; 2874 } 2875 2876 /* loop over local rows, determining number of off diagonal entries */ 2877 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2878 jj = 0; 2879 for (i=0; i<m; i++) { 2880 for (j=0; j<ourlens[i]; j++) { 2881 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2882 jj++; 2883 } 2884 } 2885 2886 for (i=0; i<m; i++) { 2887 ourlens[i] -= offlens[i]; 2888 } 2889 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2890 2891 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2892 2893 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2894 2895 for (i=0; i<m; i++) { 2896 ourlens[i] += offlens[i]; 2897 } 2898 2899 if (!rank) { 2900 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 2901 2902 /* read in my part of the matrix numerical values */ 2903 nz = procsnz[0]; 2904 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2905 2906 /* insert into matrix */ 2907 jj = rstart; 2908 smycols = mycols; 2909 svals = vals; 2910 for (i=0; i<m; i++) { 2911 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2912 smycols += ourlens[i]; 2913 svals += ourlens[i]; 2914 jj++; 2915 } 2916 2917 /* read in other processors and ship out */ 2918 for (i=1; i<size; i++) { 2919 nz = procsnz[i]; 2920 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 2921 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2922 } 2923 ierr = PetscFree(procsnz);CHKERRQ(ierr); 2924 } else { 2925 /* receive numeric values */ 2926 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 2927 2928 /* receive message of values*/ 2929 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 2930 2931 /* insert into matrix */ 2932 jj = rstart; 2933 smycols = mycols; 2934 svals = vals; 2935 for (i=0; i<m; i++) { 2936 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 2937 smycols += ourlens[i]; 2938 svals += ourlens[i]; 2939 jj++; 2940 } 2941 } 2942 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 2943 ierr = PetscFree(vals);CHKERRQ(ierr); 2944 ierr = PetscFree(mycols);CHKERRQ(ierr); 2945 ierr = PetscFree(rowners);CHKERRQ(ierr); 2946 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2947 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2948 PetscFunctionReturn(0); 2949 } 2950 2951 /* Not scalable because of ISAllGather() unless getting all columns. */ 2952 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 2953 { 2954 PetscErrorCode ierr; 2955 IS iscol_local; 2956 PetscBool isstride; 2957 PetscMPIInt lisstride=0,gisstride; 2958 2959 PetscFunctionBegin; 2960 /* check if we are grabbing all columns*/ 2961 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 2962 2963 if (isstride) { 2964 PetscInt start,len,mstart,mlen; 2965 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 2966 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 2967 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 2968 if (mstart == start && mlen-mstart == len) lisstride = 1; 2969 } 2970 2971 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2972 if (gisstride) { 2973 PetscInt N; 2974 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 2975 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 2976 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 2977 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 2978 } else { 2979 PetscInt cbs; 2980 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 2981 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 2982 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 2983 } 2984 2985 *isseq = iscol_local; 2986 PetscFunctionReturn(0); 2987 } 2988 2989 /* 2990 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 2991 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 2992 2993 Input Parameters: 2994 mat - matrix 2995 isrow - parallel row index set; its local indices are a subset of local columns of mat, 2996 i.e., mat->rstart <= isrow[i] < mat->rend 2997 iscol - parallel column index set; its local indices are a subset of local columns of mat, 2998 i.e., mat->cstart <= iscol[i] < mat->cend 2999 Output Parameter: 3000 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3001 iscol_o - sequential column index set for retrieving mat->B 3002 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3003 */ 3004 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3005 { 3006 PetscErrorCode ierr; 3007 Vec x,cmap; 3008 const PetscInt *is_idx; 3009 PetscScalar *xarray,*cmaparray; 3010 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3011 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3012 Mat B=a->B; 3013 Vec lvec=a->lvec,lcmap; 3014 PetscInt i,cstart,cend,Bn=B->cmap->N; 3015 MPI_Comm comm; 3016 3017 PetscFunctionBegin; 3018 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3019 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3020 3021 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3022 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3023 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3024 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3025 3026 /* Get start indices */ 3027 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3028 isstart -= ncols; 3029 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3030 3031 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3032 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3033 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3034 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3035 for (i=0; i<ncols; i++) { 3036 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3037 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3038 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3039 } 3040 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3041 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3042 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3043 3044 /* Get iscol_d */ 3045 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3046 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3047 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3048 3049 /* Get isrow_d */ 3050 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3051 rstart = mat->rmap->rstart; 3052 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3053 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3054 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3055 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3056 3057 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3058 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3059 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3060 3061 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3062 ierr = VecScatterBegin(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3063 3064 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3065 3066 ierr = VecScatterEnd(a->Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3067 ierr = VecScatterBegin(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3068 ierr = VecScatterEnd(a->Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3069 3070 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3071 /* off-process column indices */ 3072 count = 0; 3073 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3074 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3075 3076 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3077 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3078 for (i=0; i<Bn; i++) { 3079 if (PetscRealPart(xarray[i]) > -1.0) { 3080 idx[count] = i; /* local column index in off-diagonal part B */ 3081 cmap1[count++] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3082 } 3083 } 3084 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3085 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3086 3087 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3088 /* cannot ensure iscol_o has same blocksize as iscol! */ 3089 3090 ierr = PetscFree(idx);CHKERRQ(ierr); 3091 3092 *garray = cmap1; 3093 3094 ierr = VecDestroy(&x);CHKERRQ(ierr); 3095 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3096 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3097 PetscFunctionReturn(0); 3098 } 3099 3100 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3101 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3102 { 3103 PetscErrorCode ierr; 3104 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3105 Mat M = NULL; 3106 MPI_Comm comm; 3107 IS iscol_d,isrow_d,iscol_o; 3108 Mat Asub = NULL,Bsub = NULL; 3109 PetscInt n; 3110 3111 PetscFunctionBegin; 3112 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3113 3114 if (call == MAT_REUSE_MATRIX) { 3115 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3116 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3117 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3118 3119 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3120 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3121 3122 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3123 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3124 3125 /* Update diagonal and off-diagonal portions of submat */ 3126 asub = (Mat_MPIAIJ*)(*submat)->data; 3127 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3128 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3129 if (n) { 3130 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3131 } 3132 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3133 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3134 3135 } else { /* call == MAT_INITIAL_MATRIX) */ 3136 const PetscInt *garray; 3137 PetscInt BsubN; 3138 3139 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3140 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3141 3142 /* Create local submatrices Asub and Bsub */ 3143 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3144 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3145 3146 /* Create submatrix M */ 3147 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3148 3149 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3150 asub = (Mat_MPIAIJ*)M->data; 3151 3152 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3153 n = asub->B->cmap->N; 3154 if (BsubN > n) { 3155 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3156 const PetscInt *idx; 3157 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3158 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3159 3160 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3161 j = 0; 3162 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3163 for (i=0; i<n; i++) { 3164 if (j >= BsubN) break; 3165 while (subgarray[i] > garray[j]) j++; 3166 3167 if (subgarray[i] == garray[j]) { 3168 idx_new[i] = idx[j++]; 3169 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3170 } 3171 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3172 3173 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3174 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3175 3176 } else if (BsubN < n) { 3177 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3178 } 3179 3180 ierr = PetscFree(garray);CHKERRQ(ierr); 3181 *submat = M; 3182 3183 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3184 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3185 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3186 3187 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3188 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3189 3190 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3191 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3192 } 3193 PetscFunctionReturn(0); 3194 } 3195 3196 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3197 { 3198 PetscErrorCode ierr; 3199 IS iscol_local,isrow_d; 3200 PetscInt csize; 3201 PetscInt n,i,j,start,end; 3202 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3203 MPI_Comm comm; 3204 3205 PetscFunctionBegin; 3206 /* If isrow has same processor distribution as mat, 3207 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3208 if (call == MAT_REUSE_MATRIX) { 3209 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3210 if (isrow_d) { 3211 sameRowDist = PETSC_TRUE; 3212 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3213 } else { 3214 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3215 if (iscol_local) { 3216 sameRowDist = PETSC_TRUE; 3217 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3218 } 3219 } 3220 } else { 3221 /* Check if isrow has same processor distribution as mat */ 3222 sameDist[0] = PETSC_FALSE; 3223 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3224 if (!n) { 3225 sameDist[0] = PETSC_TRUE; 3226 } else { 3227 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3228 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3229 if (i >= start && j < end) { 3230 sameDist[0] = PETSC_TRUE; 3231 } 3232 } 3233 3234 /* Check if iscol has same processor distribution as mat */ 3235 sameDist[1] = PETSC_FALSE; 3236 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3237 if (!n) { 3238 sameDist[1] = PETSC_TRUE; 3239 } else { 3240 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3241 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3242 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3243 } 3244 3245 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3246 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3247 sameRowDist = tsameDist[0]; 3248 } 3249 3250 if (sameRowDist) { 3251 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3252 /* isrow and iscol have same processor distribution as mat */ 3253 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3254 } else { /* sameRowDist */ 3255 /* isrow has same processor distribution as mat */ 3256 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3257 } 3258 PetscFunctionReturn(0); 3259 } 3260 3261 /* General case: iscol -> iscol_local which has global size of iscol */ 3262 if (call == MAT_REUSE_MATRIX) { 3263 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3264 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3265 } else { 3266 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3267 } 3268 3269 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3270 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3271 3272 if (call == MAT_INITIAL_MATRIX) { 3273 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3274 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3275 } 3276 PetscFunctionReturn(0); 3277 } 3278 3279 /*@C 3280 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3281 and "off-diagonal" part of the matrix in CSR format. 3282 3283 Collective on MPI_Comm 3284 3285 Input Parameters: 3286 + comm - MPI communicator 3287 . A - "diagonal" portion of matrix 3288 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3289 - garray - global index of B columns 3290 3291 Output Parameter: 3292 . mat - the matrix, with input A as its local diagonal matrix 3293 Level: advanced 3294 3295 Notes: 3296 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3297 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3298 3299 .seealso: MatCreateMPIAIJWithSplitArrays() 3300 @*/ 3301 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3302 { 3303 PetscErrorCode ierr; 3304 Mat_MPIAIJ *maij; 3305 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3306 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3307 PetscScalar *oa=b->a; 3308 Mat Bnew; 3309 PetscInt m,n,N; 3310 3311 PetscFunctionBegin; 3312 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3313 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3314 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3315 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3316 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3317 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3318 3319 /* Get global columns of mat */ 3320 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3321 3322 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3323 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3324 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3325 maij = (Mat_MPIAIJ*)(*mat)->data; 3326 3327 (*mat)->preallocated = PETSC_TRUE; 3328 3329 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3330 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3331 3332 /* Set A as diagonal portion of *mat */ 3333 maij->A = A; 3334 3335 nz = oi[m]; 3336 for (i=0; i<nz; i++) { 3337 col = oj[i]; 3338 oj[i] = garray[col]; 3339 } 3340 3341 /* Set Bnew as off-diagonal portion of *mat */ 3342 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3343 bnew = (Mat_SeqAIJ*)Bnew->data; 3344 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3345 maij->B = Bnew; 3346 3347 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3348 3349 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3350 b->free_a = PETSC_FALSE; 3351 b->free_ij = PETSC_FALSE; 3352 ierr = MatDestroy(&B);CHKERRQ(ierr); 3353 3354 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3355 bnew->free_a = PETSC_TRUE; 3356 bnew->free_ij = PETSC_TRUE; 3357 3358 /* condense columns of maij->B */ 3359 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3360 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3361 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3362 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3363 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3364 PetscFunctionReturn(0); 3365 } 3366 3367 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3368 3369 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3370 { 3371 PetscErrorCode ierr; 3372 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3373 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3374 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3375 Mat M,Msub,B=a->B; 3376 MatScalar *aa; 3377 Mat_SeqAIJ *aij; 3378 PetscInt *garray = a->garray,*colsub,Ncols; 3379 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3380 IS iscol_sub,iscmap; 3381 const PetscInt *is_idx,*cmap; 3382 PetscBool allcolumns=PETSC_FALSE; 3383 IS iscol_local=NULL; 3384 MPI_Comm comm; 3385 3386 PetscFunctionBegin; 3387 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3388 3389 if (call == MAT_REUSE_MATRIX) { 3390 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3391 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3392 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3393 3394 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3395 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3396 3397 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3398 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3399 3400 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3401 3402 } else { /* call == MAT_INITIAL_MATRIX) */ 3403 PetscBool flg; 3404 3405 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3406 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3407 3408 /* (1) iscol -> nonscalable iscol_local */ 3409 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3410 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3411 if (n != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != Ncols %d",n,Ncols); 3412 3413 /* Check for special case: each processor gets entire matrix columns */ 3414 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3415 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3416 if (allcolumns) { 3417 iscol_sub = iscol_local; 3418 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3419 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3420 3421 } else { 3422 /* (2) iscol_local -> iscol_sub and iscmap */ 3423 PetscInt *idx,*cmap1,k; 3424 3425 /* implementation below requires iscol_local be sorted, it can have duplicate indices */ 3426 ierr = ISSorted(iscol_local,&flg);CHKERRQ(ierr); 3427 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unsorted iscol_local is not implemented yet"); 3428 3429 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3430 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3431 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3432 count = 0; 3433 k = 0; 3434 for (i=0; i<Ncols; i++) { 3435 j = is_idx[i]; 3436 if (j >= cstart && j < cend) { 3437 /* diagonal part of mat */ 3438 idx[count] = j; 3439 cmap1[count++] = i; /* column index in submat */ 3440 } else if (Bn) { 3441 /* off-diagonal part of mat */ 3442 if (j == garray[k]) { 3443 idx[count] = j; 3444 cmap1[count++] = i; /* column index in submat */ 3445 } else if (j > garray[k]) { 3446 while (j > garray[k] && k < Bn-1) k++; 3447 if (j == garray[k]) { 3448 idx[count] = j; 3449 cmap1[count++] = i; /* column index in submat */ 3450 } 3451 } 3452 } 3453 } 3454 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3455 3456 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3457 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3458 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3459 3460 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3461 } 3462 3463 /* (3) Create sequential Msub */ 3464 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3465 } 3466 3467 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3468 aij = (Mat_SeqAIJ*)(Msub)->data; 3469 ii = aij->i; 3470 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3471 3472 /* 3473 m - number of local rows 3474 Ncols - number of columns (same on all processors) 3475 rstart - first row in new global matrix generated 3476 */ 3477 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3478 3479 if (call == MAT_INITIAL_MATRIX) { 3480 /* (4) Create parallel newmat */ 3481 PetscMPIInt rank,size; 3482 PetscInt csize; 3483 3484 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3485 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3486 3487 /* 3488 Determine the number of non-zeros in the diagonal and off-diagonal 3489 portions of the matrix in order to do correct preallocation 3490 */ 3491 3492 /* first get start and end of "diagonal" columns */ 3493 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3494 if (csize == PETSC_DECIDE) { 3495 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3496 if (mglobal == Ncols) { /* square matrix */ 3497 nlocal = m; 3498 } else { 3499 nlocal = Ncols/size + ((Ncols % size) > rank); 3500 } 3501 } else { 3502 nlocal = csize; 3503 } 3504 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3505 rstart = rend - nlocal; 3506 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3507 3508 /* next, compute all the lengths */ 3509 jj = aij->j; 3510 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3511 olens = dlens + m; 3512 for (i=0; i<m; i++) { 3513 jend = ii[i+1] - ii[i]; 3514 olen = 0; 3515 dlen = 0; 3516 for (j=0; j<jend; j++) { 3517 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3518 else dlen++; 3519 jj++; 3520 } 3521 olens[i] = olen; 3522 dlens[i] = dlen; 3523 } 3524 3525 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3526 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3527 3528 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3529 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3530 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3531 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3532 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3533 ierr = PetscFree(dlens);CHKERRQ(ierr); 3534 3535 } else { /* call == MAT_REUSE_MATRIX */ 3536 M = *newmat; 3537 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3538 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3539 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3540 /* 3541 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3542 rather than the slower MatSetValues(). 3543 */ 3544 M->was_assembled = PETSC_TRUE; 3545 M->assembled = PETSC_FALSE; 3546 } 3547 3548 /* (5) Set values of Msub to *newmat */ 3549 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3550 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3551 3552 jj = aij->j; 3553 aa = aij->a; 3554 for (i=0; i<m; i++) { 3555 row = rstart + i; 3556 nz = ii[i+1] - ii[i]; 3557 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3558 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3559 jj += nz; aa += nz; 3560 } 3561 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3562 3563 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3564 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3565 3566 ierr = PetscFree(colsub);CHKERRQ(ierr); 3567 3568 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3569 if (call == MAT_INITIAL_MATRIX) { 3570 *newmat = M; 3571 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3572 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3573 3574 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3575 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3576 3577 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3578 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3579 3580 if (iscol_local) { 3581 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3582 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3583 } 3584 } 3585 PetscFunctionReturn(0); 3586 } 3587 3588 /* 3589 Not great since it makes two copies of the submatrix, first an SeqAIJ 3590 in local and then by concatenating the local matrices the end result. 3591 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3592 3593 Note: This requires a sequential iscol with all indices. 3594 */ 3595 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3596 { 3597 PetscErrorCode ierr; 3598 PetscMPIInt rank,size; 3599 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3600 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3601 Mat M,Mreuse; 3602 MatScalar *aa,*vwork; 3603 MPI_Comm comm; 3604 Mat_SeqAIJ *aij; 3605 PetscBool colflag,allcolumns=PETSC_FALSE; 3606 3607 PetscFunctionBegin; 3608 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3609 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3610 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3611 3612 /* Check for special case: each processor gets entire matrix columns */ 3613 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3614 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3615 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3616 3617 if (call == MAT_REUSE_MATRIX) { 3618 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3619 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3620 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3621 } else { 3622 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3623 } 3624 3625 /* 3626 m - number of local rows 3627 n - number of columns (same on all processors) 3628 rstart - first row in new global matrix generated 3629 */ 3630 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3631 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3632 if (call == MAT_INITIAL_MATRIX) { 3633 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3634 ii = aij->i; 3635 jj = aij->j; 3636 3637 /* 3638 Determine the number of non-zeros in the diagonal and off-diagonal 3639 portions of the matrix in order to do correct preallocation 3640 */ 3641 3642 /* first get start and end of "diagonal" columns */ 3643 if (csize == PETSC_DECIDE) { 3644 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3645 if (mglobal == n) { /* square matrix */ 3646 nlocal = m; 3647 } else { 3648 nlocal = n/size + ((n % size) > rank); 3649 } 3650 } else { 3651 nlocal = csize; 3652 } 3653 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3654 rstart = rend - nlocal; 3655 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3656 3657 /* next, compute all the lengths */ 3658 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3659 olens = dlens + m; 3660 for (i=0; i<m; i++) { 3661 jend = ii[i+1] - ii[i]; 3662 olen = 0; 3663 dlen = 0; 3664 for (j=0; j<jend; j++) { 3665 if (*jj < rstart || *jj >= rend) olen++; 3666 else dlen++; 3667 jj++; 3668 } 3669 olens[i] = olen; 3670 dlens[i] = dlen; 3671 } 3672 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3673 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3674 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3675 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3676 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3677 ierr = PetscFree(dlens);CHKERRQ(ierr); 3678 } else { 3679 PetscInt ml,nl; 3680 3681 M = *newmat; 3682 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3683 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3684 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3685 /* 3686 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3687 rather than the slower MatSetValues(). 3688 */ 3689 M->was_assembled = PETSC_TRUE; 3690 M->assembled = PETSC_FALSE; 3691 } 3692 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3693 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3694 ii = aij->i; 3695 jj = aij->j; 3696 aa = aij->a; 3697 for (i=0; i<m; i++) { 3698 row = rstart + i; 3699 nz = ii[i+1] - ii[i]; 3700 cwork = jj; jj += nz; 3701 vwork = aa; aa += nz; 3702 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3703 } 3704 3705 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3706 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3707 *newmat = M; 3708 3709 /* save submatrix used in processor for next request */ 3710 if (call == MAT_INITIAL_MATRIX) { 3711 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3712 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3713 } 3714 PetscFunctionReturn(0); 3715 } 3716 3717 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3718 { 3719 PetscInt m,cstart, cend,j,nnz,i,d; 3720 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3721 const PetscInt *JJ; 3722 PetscScalar *values; 3723 PetscErrorCode ierr; 3724 PetscBool nooffprocentries; 3725 3726 PetscFunctionBegin; 3727 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3728 3729 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3730 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3731 m = B->rmap->n; 3732 cstart = B->cmap->rstart; 3733 cend = B->cmap->rend; 3734 rstart = B->rmap->rstart; 3735 3736 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3737 3738 #if defined(PETSC_USE_DEBUGGING) 3739 for (i=0; i<m; i++) { 3740 nnz = Ii[i+1]- Ii[i]; 3741 JJ = J + Ii[i]; 3742 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3743 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3744 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3745 } 3746 #endif 3747 3748 for (i=0; i<m; i++) { 3749 nnz = Ii[i+1]- Ii[i]; 3750 JJ = J + Ii[i]; 3751 nnz_max = PetscMax(nnz_max,nnz); 3752 d = 0; 3753 for (j=0; j<nnz; j++) { 3754 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3755 } 3756 d_nnz[i] = d; 3757 o_nnz[i] = nnz - d; 3758 } 3759 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3760 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3761 3762 if (v) values = (PetscScalar*)v; 3763 else { 3764 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3765 } 3766 3767 for (i=0; i<m; i++) { 3768 ii = i + rstart; 3769 nnz = Ii[i+1]- Ii[i]; 3770 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3771 } 3772 nooffprocentries = B->nooffprocentries; 3773 B->nooffprocentries = PETSC_TRUE; 3774 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3775 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3776 B->nooffprocentries = nooffprocentries; 3777 3778 if (!v) { 3779 ierr = PetscFree(values);CHKERRQ(ierr); 3780 } 3781 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3782 PetscFunctionReturn(0); 3783 } 3784 3785 /*@ 3786 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3787 (the default parallel PETSc format). 3788 3789 Collective on MPI_Comm 3790 3791 Input Parameters: 3792 + B - the matrix 3793 . i - the indices into j for the start of each local row (starts with zero) 3794 . j - the column indices for each local row (starts with zero) 3795 - v - optional values in the matrix 3796 3797 Level: developer 3798 3799 Notes: 3800 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3801 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3802 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3803 3804 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3805 3806 The format which is used for the sparse matrix input, is equivalent to a 3807 row-major ordering.. i.e for the following matrix, the input data expected is 3808 as shown 3809 3810 $ 1 0 0 3811 $ 2 0 3 P0 3812 $ ------- 3813 $ 4 5 6 P1 3814 $ 3815 $ Process0 [P0]: rows_owned=[0,1] 3816 $ i = {0,1,3} [size = nrow+1 = 2+1] 3817 $ j = {0,0,2} [size = 3] 3818 $ v = {1,2,3} [size = 3] 3819 $ 3820 $ Process1 [P1]: rows_owned=[2] 3821 $ i = {0,3} [size = nrow+1 = 1+1] 3822 $ j = {0,1,2} [size = 3] 3823 $ v = {4,5,6} [size = 3] 3824 3825 .keywords: matrix, aij, compressed row, sparse, parallel 3826 3827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3828 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3829 @*/ 3830 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3831 { 3832 PetscErrorCode ierr; 3833 3834 PetscFunctionBegin; 3835 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3836 PetscFunctionReturn(0); 3837 } 3838 3839 /*@C 3840 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3841 (the default parallel PETSc format). For good matrix assembly performance 3842 the user should preallocate the matrix storage by setting the parameters 3843 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3844 performance can be increased by more than a factor of 50. 3845 3846 Collective on MPI_Comm 3847 3848 Input Parameters: 3849 + B - the matrix 3850 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3851 (same value is used for all local rows) 3852 . d_nnz - array containing the number of nonzeros in the various rows of the 3853 DIAGONAL portion of the local submatrix (possibly different for each row) 3854 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3855 The size of this array is equal to the number of local rows, i.e 'm'. 3856 For matrices that will be factored, you must leave room for (and set) 3857 the diagonal entry even if it is zero. 3858 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3859 submatrix (same value is used for all local rows). 3860 - o_nnz - array containing the number of nonzeros in the various rows of the 3861 OFF-DIAGONAL portion of the local submatrix (possibly different for 3862 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3863 structure. The size of this array is equal to the number 3864 of local rows, i.e 'm'. 3865 3866 If the *_nnz parameter is given then the *_nz parameter is ignored 3867 3868 The AIJ format (also called the Yale sparse matrix format or 3869 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3870 storage. The stored row and column indices begin with zero. 3871 See Users-Manual: ch_mat for details. 3872 3873 The parallel matrix is partitioned such that the first m0 rows belong to 3874 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3875 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3876 3877 The DIAGONAL portion of the local submatrix of a processor can be defined 3878 as the submatrix which is obtained by extraction the part corresponding to 3879 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3880 first row that belongs to the processor, r2 is the last row belonging to 3881 the this processor, and c1-c2 is range of indices of the local part of a 3882 vector suitable for applying the matrix to. This is an mxn matrix. In the 3883 common case of a square matrix, the row and column ranges are the same and 3884 the DIAGONAL part is also square. The remaining portion of the local 3885 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3886 3887 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3888 3889 You can call MatGetInfo() to get information on how effective the preallocation was; 3890 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3891 You can also run with the option -info and look for messages with the string 3892 malloc in them to see if additional memory allocation was needed. 3893 3894 Example usage: 3895 3896 Consider the following 8x8 matrix with 34 non-zero values, that is 3897 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3898 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3899 as follows: 3900 3901 .vb 3902 1 2 0 | 0 3 0 | 0 4 3903 Proc0 0 5 6 | 7 0 0 | 8 0 3904 9 0 10 | 11 0 0 | 12 0 3905 ------------------------------------- 3906 13 0 14 | 15 16 17 | 0 0 3907 Proc1 0 18 0 | 19 20 21 | 0 0 3908 0 0 0 | 22 23 0 | 24 0 3909 ------------------------------------- 3910 Proc2 25 26 27 | 0 0 28 | 29 0 3911 30 0 0 | 31 32 33 | 0 34 3912 .ve 3913 3914 This can be represented as a collection of submatrices as: 3915 3916 .vb 3917 A B C 3918 D E F 3919 G H I 3920 .ve 3921 3922 Where the submatrices A,B,C are owned by proc0, D,E,F are 3923 owned by proc1, G,H,I are owned by proc2. 3924 3925 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3926 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3927 The 'M','N' parameters are 8,8, and have the same values on all procs. 3928 3929 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3930 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3931 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3932 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3933 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3934 matrix, ans [DF] as another SeqAIJ matrix. 3935 3936 When d_nz, o_nz parameters are specified, d_nz storage elements are 3937 allocated for every row of the local diagonal submatrix, and o_nz 3938 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3939 One way to choose d_nz and o_nz is to use the max nonzerors per local 3940 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3941 In this case, the values of d_nz,o_nz are: 3942 .vb 3943 proc0 : dnz = 2, o_nz = 2 3944 proc1 : dnz = 3, o_nz = 2 3945 proc2 : dnz = 1, o_nz = 4 3946 .ve 3947 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3948 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3949 for proc3. i.e we are using 12+15+10=37 storage locations to store 3950 34 values. 3951 3952 When d_nnz, o_nnz parameters are specified, the storage is specified 3953 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3954 In the above case the values for d_nnz,o_nnz are: 3955 .vb 3956 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3957 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3958 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3959 .ve 3960 Here the space allocated is sum of all the above values i.e 34, and 3961 hence pre-allocation is perfect. 3962 3963 Level: intermediate 3964 3965 .keywords: matrix, aij, compressed row, sparse, parallel 3966 3967 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3968 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3969 @*/ 3970 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3971 { 3972 PetscErrorCode ierr; 3973 3974 PetscFunctionBegin; 3975 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3976 PetscValidType(B,1); 3977 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3978 PetscFunctionReturn(0); 3979 } 3980 3981 /*@ 3982 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3983 CSR format the local rows. 3984 3985 Collective on MPI_Comm 3986 3987 Input Parameters: 3988 + comm - MPI communicator 3989 . m - number of local rows (Cannot be PETSC_DECIDE) 3990 . n - This value should be the same as the local size used in creating the 3991 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3992 calculated if N is given) For square matrices n is almost always m. 3993 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3994 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3995 . i - row indices 3996 . j - column indices 3997 - a - matrix values 3998 3999 Output Parameter: 4000 . mat - the matrix 4001 4002 Level: intermediate 4003 4004 Notes: 4005 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4006 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4007 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4008 4009 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4010 4011 The format which is used for the sparse matrix input, is equivalent to a 4012 row-major ordering.. i.e for the following matrix, the input data expected is 4013 as shown 4014 4015 $ 1 0 0 4016 $ 2 0 3 P0 4017 $ ------- 4018 $ 4 5 6 P1 4019 $ 4020 $ Process0 [P0]: rows_owned=[0,1] 4021 $ i = {0,1,3} [size = nrow+1 = 2+1] 4022 $ j = {0,0,2} [size = 3] 4023 $ v = {1,2,3} [size = 3] 4024 $ 4025 $ Process1 [P1]: rows_owned=[2] 4026 $ i = {0,3} [size = nrow+1 = 1+1] 4027 $ j = {0,1,2} [size = 3] 4028 $ v = {4,5,6} [size = 3] 4029 4030 .keywords: matrix, aij, compressed row, sparse, parallel 4031 4032 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4033 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 4034 @*/ 4035 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4036 { 4037 PetscErrorCode ierr; 4038 4039 PetscFunctionBegin; 4040 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4041 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4042 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4043 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4044 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4045 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4046 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4047 PetscFunctionReturn(0); 4048 } 4049 4050 /*@C 4051 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4052 (the default parallel PETSc format). For good matrix assembly performance 4053 the user should preallocate the matrix storage by setting the parameters 4054 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4055 performance can be increased by more than a factor of 50. 4056 4057 Collective on MPI_Comm 4058 4059 Input Parameters: 4060 + comm - MPI communicator 4061 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4062 This value should be the same as the local size used in creating the 4063 y vector for the matrix-vector product y = Ax. 4064 . n - This value should be the same as the local size used in creating the 4065 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4066 calculated if N is given) For square matrices n is almost always m. 4067 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4068 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4069 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4070 (same value is used for all local rows) 4071 . d_nnz - array containing the number of nonzeros in the various rows of the 4072 DIAGONAL portion of the local submatrix (possibly different for each row) 4073 or NULL, if d_nz is used to specify the nonzero structure. 4074 The size of this array is equal to the number of local rows, i.e 'm'. 4075 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4076 submatrix (same value is used for all local rows). 4077 - o_nnz - array containing the number of nonzeros in the various rows of the 4078 OFF-DIAGONAL portion of the local submatrix (possibly different for 4079 each row) or NULL, if o_nz is used to specify the nonzero 4080 structure. The size of this array is equal to the number 4081 of local rows, i.e 'm'. 4082 4083 Output Parameter: 4084 . A - the matrix 4085 4086 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4087 MatXXXXSetPreallocation() paradgm instead of this routine directly. 4088 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4089 4090 Notes: 4091 If the *_nnz parameter is given then the *_nz parameter is ignored 4092 4093 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4094 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4095 storage requirements for this matrix. 4096 4097 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4098 processor than it must be used on all processors that share the object for 4099 that argument. 4100 4101 The user MUST specify either the local or global matrix dimensions 4102 (possibly both). 4103 4104 The parallel matrix is partitioned across processors such that the 4105 first m0 rows belong to process 0, the next m1 rows belong to 4106 process 1, the next m2 rows belong to process 2 etc.. where 4107 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4108 values corresponding to [m x N] submatrix. 4109 4110 The columns are logically partitioned with the n0 columns belonging 4111 to 0th partition, the next n1 columns belonging to the next 4112 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4113 4114 The DIAGONAL portion of the local submatrix on any given processor 4115 is the submatrix corresponding to the rows and columns m,n 4116 corresponding to the given processor. i.e diagonal matrix on 4117 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4118 etc. The remaining portion of the local submatrix [m x (N-n)] 4119 constitute the OFF-DIAGONAL portion. The example below better 4120 illustrates this concept. 4121 4122 For a square global matrix we define each processor's diagonal portion 4123 to be its local rows and the corresponding columns (a square submatrix); 4124 each processor's off-diagonal portion encompasses the remainder of the 4125 local matrix (a rectangular submatrix). 4126 4127 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4128 4129 When calling this routine with a single process communicator, a matrix of 4130 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4131 type of communicator, use the construction mechanism 4132 .vb 4133 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4134 .ve 4135 4136 $ MatCreate(...,&A); 4137 $ MatSetType(A,MATMPIAIJ); 4138 $ MatSetSizes(A, m,n,M,N); 4139 $ MatMPIAIJSetPreallocation(A,...); 4140 4141 By default, this format uses inodes (identical nodes) when possible. 4142 We search for consecutive rows with the same nonzero structure, thereby 4143 reusing matrix information to achieve increased efficiency. 4144 4145 Options Database Keys: 4146 + -mat_no_inode - Do not use inodes 4147 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4148 - -mat_aij_oneindex - Internally use indexing starting at 1 4149 rather than 0. Note that when calling MatSetValues(), 4150 the user still MUST index entries starting at 0! 4151 4152 4153 Example usage: 4154 4155 Consider the following 8x8 matrix with 34 non-zero values, that is 4156 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4157 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4158 as follows 4159 4160 .vb 4161 1 2 0 | 0 3 0 | 0 4 4162 Proc0 0 5 6 | 7 0 0 | 8 0 4163 9 0 10 | 11 0 0 | 12 0 4164 ------------------------------------- 4165 13 0 14 | 15 16 17 | 0 0 4166 Proc1 0 18 0 | 19 20 21 | 0 0 4167 0 0 0 | 22 23 0 | 24 0 4168 ------------------------------------- 4169 Proc2 25 26 27 | 0 0 28 | 29 0 4170 30 0 0 | 31 32 33 | 0 34 4171 .ve 4172 4173 This can be represented as a collection of submatrices as 4174 4175 .vb 4176 A B C 4177 D E F 4178 G H I 4179 .ve 4180 4181 Where the submatrices A,B,C are owned by proc0, D,E,F are 4182 owned by proc1, G,H,I are owned by proc2. 4183 4184 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4185 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4186 The 'M','N' parameters are 8,8, and have the same values on all procs. 4187 4188 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4189 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4190 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4191 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4192 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4193 matrix, ans [DF] as another SeqAIJ matrix. 4194 4195 When d_nz, o_nz parameters are specified, d_nz storage elements are 4196 allocated for every row of the local diagonal submatrix, and o_nz 4197 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4198 One way to choose d_nz and o_nz is to use the max nonzerors per local 4199 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4200 In this case, the values of d_nz,o_nz are 4201 .vb 4202 proc0 : dnz = 2, o_nz = 2 4203 proc1 : dnz = 3, o_nz = 2 4204 proc2 : dnz = 1, o_nz = 4 4205 .ve 4206 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4207 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4208 for proc3. i.e we are using 12+15+10=37 storage locations to store 4209 34 values. 4210 4211 When d_nnz, o_nnz parameters are specified, the storage is specified 4212 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4213 In the above case the values for d_nnz,o_nnz are 4214 .vb 4215 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4216 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4217 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4218 .ve 4219 Here the space allocated is sum of all the above values i.e 34, and 4220 hence pre-allocation is perfect. 4221 4222 Level: intermediate 4223 4224 .keywords: matrix, aij, compressed row, sparse, parallel 4225 4226 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4227 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4228 @*/ 4229 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4230 { 4231 PetscErrorCode ierr; 4232 PetscMPIInt size; 4233 4234 PetscFunctionBegin; 4235 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4236 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4237 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4238 if (size > 1) { 4239 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4240 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4241 } else { 4242 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4243 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4244 } 4245 PetscFunctionReturn(0); 4246 } 4247 4248 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4249 { 4250 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4251 PetscBool flg; 4252 PetscErrorCode ierr; 4253 4254 PetscFunctionBegin; 4255 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 4256 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4257 if (Ad) *Ad = a->A; 4258 if (Ao) *Ao = a->B; 4259 if (colmap) *colmap = a->garray; 4260 PetscFunctionReturn(0); 4261 } 4262 4263 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4264 { 4265 PetscErrorCode ierr; 4266 PetscInt m,N,i,rstart,nnz,Ii; 4267 PetscInt *indx; 4268 PetscScalar *values; 4269 4270 PetscFunctionBegin; 4271 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4272 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4273 PetscInt *dnz,*onz,sum,bs,cbs; 4274 4275 if (n == PETSC_DECIDE) { 4276 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4277 } 4278 /* Check sum(n) = N */ 4279 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4280 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4281 4282 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4283 rstart -= m; 4284 4285 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4286 for (i=0; i<m; i++) { 4287 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4288 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4289 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4290 } 4291 4292 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4293 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4294 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4295 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4296 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4297 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4298 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4299 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4300 } 4301 4302 /* numeric phase */ 4303 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4304 for (i=0; i<m; i++) { 4305 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4306 Ii = i + rstart; 4307 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4308 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4309 } 4310 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4311 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4312 PetscFunctionReturn(0); 4313 } 4314 4315 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4316 { 4317 PetscErrorCode ierr; 4318 PetscMPIInt rank; 4319 PetscInt m,N,i,rstart,nnz; 4320 size_t len; 4321 const PetscInt *indx; 4322 PetscViewer out; 4323 char *name; 4324 Mat B; 4325 const PetscScalar *values; 4326 4327 PetscFunctionBegin; 4328 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4329 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4330 /* Should this be the type of the diagonal block of A? */ 4331 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4332 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4333 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4334 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4335 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4336 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4337 for (i=0; i<m; i++) { 4338 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4339 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4340 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4341 } 4342 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4343 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4344 4345 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4346 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4347 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4348 sprintf(name,"%s.%d",outfile,rank); 4349 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4350 ierr = PetscFree(name);CHKERRQ(ierr); 4351 ierr = MatView(B,out);CHKERRQ(ierr); 4352 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4353 ierr = MatDestroy(&B);CHKERRQ(ierr); 4354 PetscFunctionReturn(0); 4355 } 4356 4357 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4358 { 4359 PetscErrorCode ierr; 4360 Mat_Merge_SeqsToMPI *merge; 4361 PetscContainer container; 4362 4363 PetscFunctionBegin; 4364 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4365 if (container) { 4366 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4367 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4368 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4369 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4370 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4371 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4372 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4373 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4374 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4375 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4376 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4377 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4378 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4379 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4380 ierr = PetscFree(merge);CHKERRQ(ierr); 4381 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4382 } 4383 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4384 PetscFunctionReturn(0); 4385 } 4386 4387 #include <../src/mat/utils/freespace.h> 4388 #include <petscbt.h> 4389 4390 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4391 { 4392 PetscErrorCode ierr; 4393 MPI_Comm comm; 4394 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4395 PetscMPIInt size,rank,taga,*len_s; 4396 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4397 PetscInt proc,m; 4398 PetscInt **buf_ri,**buf_rj; 4399 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4400 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4401 MPI_Request *s_waits,*r_waits; 4402 MPI_Status *status; 4403 MatScalar *aa=a->a; 4404 MatScalar **abuf_r,*ba_i; 4405 Mat_Merge_SeqsToMPI *merge; 4406 PetscContainer container; 4407 4408 PetscFunctionBegin; 4409 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4410 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4411 4412 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4413 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4414 4415 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4416 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4417 4418 bi = merge->bi; 4419 bj = merge->bj; 4420 buf_ri = merge->buf_ri; 4421 buf_rj = merge->buf_rj; 4422 4423 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4424 owners = merge->rowmap->range; 4425 len_s = merge->len_s; 4426 4427 /* send and recv matrix values */ 4428 /*-----------------------------*/ 4429 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4430 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4431 4432 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4433 for (proc=0,k=0; proc<size; proc++) { 4434 if (!len_s[proc]) continue; 4435 i = owners[proc]; 4436 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4437 k++; 4438 } 4439 4440 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4441 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4442 ierr = PetscFree(status);CHKERRQ(ierr); 4443 4444 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4445 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4446 4447 /* insert mat values of mpimat */ 4448 /*----------------------------*/ 4449 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4450 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4451 4452 for (k=0; k<merge->nrecv; k++) { 4453 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4454 nrows = *(buf_ri_k[k]); 4455 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4456 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4457 } 4458 4459 /* set values of ba */ 4460 m = merge->rowmap->n; 4461 for (i=0; i<m; i++) { 4462 arow = owners[rank] + i; 4463 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4464 bnzi = bi[i+1] - bi[i]; 4465 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 4466 4467 /* add local non-zero vals of this proc's seqmat into ba */ 4468 anzi = ai[arow+1] - ai[arow]; 4469 aj = a->j + ai[arow]; 4470 aa = a->a + ai[arow]; 4471 nextaj = 0; 4472 for (j=0; nextaj<anzi; j++) { 4473 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4474 ba_i[j] += aa[nextaj++]; 4475 } 4476 } 4477 4478 /* add received vals into ba */ 4479 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4480 /* i-th row */ 4481 if (i == *nextrow[k]) { 4482 anzi = *(nextai[k]+1) - *nextai[k]; 4483 aj = buf_rj[k] + *(nextai[k]); 4484 aa = abuf_r[k] + *(nextai[k]); 4485 nextaj = 0; 4486 for (j=0; nextaj<anzi; j++) { 4487 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4488 ba_i[j] += aa[nextaj++]; 4489 } 4490 } 4491 nextrow[k]++; nextai[k]++; 4492 } 4493 } 4494 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4495 } 4496 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4497 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4498 4499 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4500 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4501 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4502 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4503 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4504 PetscFunctionReturn(0); 4505 } 4506 4507 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4508 { 4509 PetscErrorCode ierr; 4510 Mat B_mpi; 4511 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4512 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4513 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4514 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4515 PetscInt len,proc,*dnz,*onz,bs,cbs; 4516 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4517 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4518 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4519 MPI_Status *status; 4520 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4521 PetscBT lnkbt; 4522 Mat_Merge_SeqsToMPI *merge; 4523 PetscContainer container; 4524 4525 PetscFunctionBegin; 4526 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4527 4528 /* make sure it is a PETSc comm */ 4529 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4530 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4531 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4532 4533 ierr = PetscNew(&merge);CHKERRQ(ierr); 4534 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4535 4536 /* determine row ownership */ 4537 /*---------------------------------------------------------*/ 4538 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4539 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4540 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4541 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4542 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4543 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4544 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4545 4546 m = merge->rowmap->n; 4547 owners = merge->rowmap->range; 4548 4549 /* determine the number of messages to send, their lengths */ 4550 /*---------------------------------------------------------*/ 4551 len_s = merge->len_s; 4552 4553 len = 0; /* length of buf_si[] */ 4554 merge->nsend = 0; 4555 for (proc=0; proc<size; proc++) { 4556 len_si[proc] = 0; 4557 if (proc == rank) { 4558 len_s[proc] = 0; 4559 } else { 4560 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4561 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4562 } 4563 if (len_s[proc]) { 4564 merge->nsend++; 4565 nrows = 0; 4566 for (i=owners[proc]; i<owners[proc+1]; i++) { 4567 if (ai[i+1] > ai[i]) nrows++; 4568 } 4569 len_si[proc] = 2*(nrows+1); 4570 len += len_si[proc]; 4571 } 4572 } 4573 4574 /* determine the number and length of messages to receive for ij-structure */ 4575 /*-------------------------------------------------------------------------*/ 4576 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4577 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4578 4579 /* post the Irecv of j-structure */ 4580 /*-------------------------------*/ 4581 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4582 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4583 4584 /* post the Isend of j-structure */ 4585 /*--------------------------------*/ 4586 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4587 4588 for (proc=0, k=0; proc<size; proc++) { 4589 if (!len_s[proc]) continue; 4590 i = owners[proc]; 4591 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4592 k++; 4593 } 4594 4595 /* receives and sends of j-structure are complete */ 4596 /*------------------------------------------------*/ 4597 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4598 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4599 4600 /* send and recv i-structure */ 4601 /*---------------------------*/ 4602 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4603 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4604 4605 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4606 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4607 for (proc=0,k=0; proc<size; proc++) { 4608 if (!len_s[proc]) continue; 4609 /* form outgoing message for i-structure: 4610 buf_si[0]: nrows to be sent 4611 [1:nrows]: row index (global) 4612 [nrows+1:2*nrows+1]: i-structure index 4613 */ 4614 /*-------------------------------------------*/ 4615 nrows = len_si[proc]/2 - 1; 4616 buf_si_i = buf_si + nrows+1; 4617 buf_si[0] = nrows; 4618 buf_si_i[0] = 0; 4619 nrows = 0; 4620 for (i=owners[proc]; i<owners[proc+1]; i++) { 4621 anzi = ai[i+1] - ai[i]; 4622 if (anzi) { 4623 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4624 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4625 nrows++; 4626 } 4627 } 4628 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4629 k++; 4630 buf_si += len_si[proc]; 4631 } 4632 4633 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4634 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4635 4636 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4637 for (i=0; i<merge->nrecv; i++) { 4638 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4639 } 4640 4641 ierr = PetscFree(len_si);CHKERRQ(ierr); 4642 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4643 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4644 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4645 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4646 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4647 ierr = PetscFree(status);CHKERRQ(ierr); 4648 4649 /* compute a local seq matrix in each processor */ 4650 /*----------------------------------------------*/ 4651 /* allocate bi array and free space for accumulating nonzero column info */ 4652 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4653 bi[0] = 0; 4654 4655 /* create and initialize a linked list */ 4656 nlnk = N+1; 4657 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4658 4659 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4660 len = ai[owners[rank+1]] - ai[owners[rank]]; 4661 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4662 4663 current_space = free_space; 4664 4665 /* determine symbolic info for each local row */ 4666 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4667 4668 for (k=0; k<merge->nrecv; k++) { 4669 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4670 nrows = *buf_ri_k[k]; 4671 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4672 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4673 } 4674 4675 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4676 len = 0; 4677 for (i=0; i<m; i++) { 4678 bnzi = 0; 4679 /* add local non-zero cols of this proc's seqmat into lnk */ 4680 arow = owners[rank] + i; 4681 anzi = ai[arow+1] - ai[arow]; 4682 aj = a->j + ai[arow]; 4683 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4684 bnzi += nlnk; 4685 /* add received col data into lnk */ 4686 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4687 if (i == *nextrow[k]) { /* i-th row */ 4688 anzi = *(nextai[k]+1) - *nextai[k]; 4689 aj = buf_rj[k] + *nextai[k]; 4690 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4691 bnzi += nlnk; 4692 nextrow[k]++; nextai[k]++; 4693 } 4694 } 4695 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4696 4697 /* if free space is not available, make more free space */ 4698 if (current_space->local_remaining<bnzi) { 4699 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4700 nspacedouble++; 4701 } 4702 /* copy data into free space, then initialize lnk */ 4703 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4704 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4705 4706 current_space->array += bnzi; 4707 current_space->local_used += bnzi; 4708 current_space->local_remaining -= bnzi; 4709 4710 bi[i+1] = bi[i] + bnzi; 4711 } 4712 4713 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4714 4715 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4716 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4717 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4718 4719 /* create symbolic parallel matrix B_mpi */ 4720 /*---------------------------------------*/ 4721 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4722 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4723 if (n==PETSC_DECIDE) { 4724 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4725 } else { 4726 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4727 } 4728 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4729 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4730 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4731 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4732 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4733 4734 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4735 B_mpi->assembled = PETSC_FALSE; 4736 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4737 merge->bi = bi; 4738 merge->bj = bj; 4739 merge->buf_ri = buf_ri; 4740 merge->buf_rj = buf_rj; 4741 merge->coi = NULL; 4742 merge->coj = NULL; 4743 merge->owners_co = NULL; 4744 4745 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4746 4747 /* attach the supporting struct to B_mpi for reuse */ 4748 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4749 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4750 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4751 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4752 *mpimat = B_mpi; 4753 4754 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4755 PetscFunctionReturn(0); 4756 } 4757 4758 /*@C 4759 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4760 matrices from each processor 4761 4762 Collective on MPI_Comm 4763 4764 Input Parameters: 4765 + comm - the communicators the parallel matrix will live on 4766 . seqmat - the input sequential matrices 4767 . m - number of local rows (or PETSC_DECIDE) 4768 . n - number of local columns (or PETSC_DECIDE) 4769 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4770 4771 Output Parameter: 4772 . mpimat - the parallel matrix generated 4773 4774 Level: advanced 4775 4776 Notes: 4777 The dimensions of the sequential matrix in each processor MUST be the same. 4778 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4779 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4780 @*/ 4781 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4782 { 4783 PetscErrorCode ierr; 4784 PetscMPIInt size; 4785 4786 PetscFunctionBegin; 4787 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4788 if (size == 1) { 4789 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4790 if (scall == MAT_INITIAL_MATRIX) { 4791 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4792 } else { 4793 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4794 } 4795 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4796 PetscFunctionReturn(0); 4797 } 4798 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4799 if (scall == MAT_INITIAL_MATRIX) { 4800 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4801 } 4802 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4803 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4804 PetscFunctionReturn(0); 4805 } 4806 4807 /*@ 4808 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 4809 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4810 with MatGetSize() 4811 4812 Not Collective 4813 4814 Input Parameters: 4815 + A - the matrix 4816 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4817 4818 Output Parameter: 4819 . A_loc - the local sequential matrix generated 4820 4821 Level: developer 4822 4823 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4824 4825 @*/ 4826 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4827 { 4828 PetscErrorCode ierr; 4829 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4830 Mat_SeqAIJ *mat,*a,*b; 4831 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4832 MatScalar *aa,*ba,*cam; 4833 PetscScalar *ca; 4834 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4835 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4836 PetscBool match; 4837 MPI_Comm comm; 4838 PetscMPIInt size; 4839 4840 PetscFunctionBegin; 4841 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4842 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4843 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4844 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4845 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4846 4847 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4848 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4849 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4850 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4851 aa = a->a; ba = b->a; 4852 if (scall == MAT_INITIAL_MATRIX) { 4853 if (size == 1) { 4854 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4855 PetscFunctionReturn(0); 4856 } 4857 4858 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4859 ci[0] = 0; 4860 for (i=0; i<am; i++) { 4861 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4862 } 4863 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4864 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4865 k = 0; 4866 for (i=0; i<am; i++) { 4867 ncols_o = bi[i+1] - bi[i]; 4868 ncols_d = ai[i+1] - ai[i]; 4869 /* off-diagonal portion of A */ 4870 for (jo=0; jo<ncols_o; jo++) { 4871 col = cmap[*bj]; 4872 if (col >= cstart) break; 4873 cj[k] = col; bj++; 4874 ca[k++] = *ba++; 4875 } 4876 /* diagonal portion of A */ 4877 for (j=0; j<ncols_d; j++) { 4878 cj[k] = cstart + *aj++; 4879 ca[k++] = *aa++; 4880 } 4881 /* off-diagonal portion of A */ 4882 for (j=jo; j<ncols_o; j++) { 4883 cj[k] = cmap[*bj++]; 4884 ca[k++] = *ba++; 4885 } 4886 } 4887 /* put together the new matrix */ 4888 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4889 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4890 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4891 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4892 mat->free_a = PETSC_TRUE; 4893 mat->free_ij = PETSC_TRUE; 4894 mat->nonew = 0; 4895 } else if (scall == MAT_REUSE_MATRIX) { 4896 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4897 ci = mat->i; cj = mat->j; cam = mat->a; 4898 for (i=0; i<am; i++) { 4899 /* off-diagonal portion of A */ 4900 ncols_o = bi[i+1] - bi[i]; 4901 for (jo=0; jo<ncols_o; jo++) { 4902 col = cmap[*bj]; 4903 if (col >= cstart) break; 4904 *cam++ = *ba++; bj++; 4905 } 4906 /* diagonal portion of A */ 4907 ncols_d = ai[i+1] - ai[i]; 4908 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4909 /* off-diagonal portion of A */ 4910 for (j=jo; j<ncols_o; j++) { 4911 *cam++ = *ba++; bj++; 4912 } 4913 } 4914 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4915 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4916 PetscFunctionReturn(0); 4917 } 4918 4919 /*@C 4920 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4921 4922 Not Collective 4923 4924 Input Parameters: 4925 + A - the matrix 4926 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4927 - row, col - index sets of rows and columns to extract (or NULL) 4928 4929 Output Parameter: 4930 . A_loc - the local sequential matrix generated 4931 4932 Level: developer 4933 4934 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4935 4936 @*/ 4937 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4938 { 4939 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4940 PetscErrorCode ierr; 4941 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4942 IS isrowa,iscola; 4943 Mat *aloc; 4944 PetscBool match; 4945 4946 PetscFunctionBegin; 4947 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4948 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4949 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4950 if (!row) { 4951 start = A->rmap->rstart; end = A->rmap->rend; 4952 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4953 } else { 4954 isrowa = *row; 4955 } 4956 if (!col) { 4957 start = A->cmap->rstart; 4958 cmap = a->garray; 4959 nzA = a->A->cmap->n; 4960 nzB = a->B->cmap->n; 4961 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4962 ncols = 0; 4963 for (i=0; i<nzB; i++) { 4964 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4965 else break; 4966 } 4967 imark = i; 4968 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4969 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4970 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4971 } else { 4972 iscola = *col; 4973 } 4974 if (scall != MAT_INITIAL_MATRIX) { 4975 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4976 aloc[0] = *A_loc; 4977 } 4978 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4979 *A_loc = aloc[0]; 4980 ierr = PetscFree(aloc);CHKERRQ(ierr); 4981 if (!row) { 4982 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4983 } 4984 if (!col) { 4985 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4986 } 4987 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4988 PetscFunctionReturn(0); 4989 } 4990 4991 /*@C 4992 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4993 4994 Collective on Mat 4995 4996 Input Parameters: 4997 + A,B - the matrices in mpiaij format 4998 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4999 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5000 5001 Output Parameter: 5002 + rowb, colb - index sets of rows and columns of B to extract 5003 - B_seq - the sequential matrix generated 5004 5005 Level: developer 5006 5007 @*/ 5008 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5009 { 5010 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5011 PetscErrorCode ierr; 5012 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5013 IS isrowb,iscolb; 5014 Mat *bseq=NULL; 5015 5016 PetscFunctionBegin; 5017 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5018 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5019 } 5020 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5021 5022 if (scall == MAT_INITIAL_MATRIX) { 5023 start = A->cmap->rstart; 5024 cmap = a->garray; 5025 nzA = a->A->cmap->n; 5026 nzB = a->B->cmap->n; 5027 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5028 ncols = 0; 5029 for (i=0; i<nzB; i++) { /* row < local row index */ 5030 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5031 else break; 5032 } 5033 imark = i; 5034 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5035 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5036 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5037 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5038 } else { 5039 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5040 isrowb = *rowb; iscolb = *colb; 5041 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5042 bseq[0] = *B_seq; 5043 } 5044 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5045 *B_seq = bseq[0]; 5046 ierr = PetscFree(bseq);CHKERRQ(ierr); 5047 if (!rowb) { 5048 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5049 } else { 5050 *rowb = isrowb; 5051 } 5052 if (!colb) { 5053 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5054 } else { 5055 *colb = iscolb; 5056 } 5057 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5058 PetscFunctionReturn(0); 5059 } 5060 5061 /* 5062 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5063 of the OFF-DIAGONAL portion of local A 5064 5065 Collective on Mat 5066 5067 Input Parameters: 5068 + A,B - the matrices in mpiaij format 5069 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5070 5071 Output Parameter: 5072 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5073 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5074 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5075 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5076 5077 Level: developer 5078 5079 */ 5080 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5081 { 5082 VecScatter_MPI_General *gen_to,*gen_from; 5083 PetscErrorCode ierr; 5084 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5085 Mat_SeqAIJ *b_oth; 5086 VecScatter ctx =a->Mvctx; 5087 MPI_Comm comm; 5088 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 5089 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 5090 PetscInt *rvalues,*svalues; 5091 MatScalar *b_otha,*bufa,*bufA; 5092 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 5093 MPI_Request *rwaits = NULL,*swaits = NULL; 5094 MPI_Status *sstatus,rstatus; 5095 PetscMPIInt jj,size; 5096 PetscInt *cols,sbs,rbs; 5097 PetscScalar *vals; 5098 5099 PetscFunctionBegin; 5100 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5101 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5102 5103 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5104 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5105 } 5106 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5107 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5108 5109 if (size == 1) { 5110 startsj_s = NULL; 5111 bufa_ptr = NULL; 5112 *B_oth = NULL; 5113 PetscFunctionReturn(0); 5114 } 5115 5116 gen_to = (VecScatter_MPI_General*)ctx->todata; 5117 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 5118 nrecvs = gen_from->n; 5119 nsends = gen_to->n; 5120 5121 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5122 srow = gen_to->indices; /* local row index to be sent */ 5123 sstarts = gen_to->starts; 5124 sprocs = gen_to->procs; 5125 sstatus = gen_to->sstatus; 5126 sbs = gen_to->bs; 5127 rstarts = gen_from->starts; 5128 rprocs = gen_from->procs; 5129 rbs = gen_from->bs; 5130 5131 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5132 if (scall == MAT_INITIAL_MATRIX) { 5133 /* i-array */ 5134 /*---------*/ 5135 /* post receives */ 5136 ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr); 5137 for (i=0; i<nrecvs; i++) { 5138 rowlen = rvalues + rstarts[i]*rbs; 5139 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5140 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5141 } 5142 5143 /* pack the outgoing message */ 5144 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5145 5146 sstartsj[0] = 0; 5147 rstartsj[0] = 0; 5148 len = 0; /* total length of j or a array to be sent */ 5149 k = 0; 5150 ierr = PetscMalloc1(sbs*(sstarts[nsends] - sstarts[0]),&svalues);CHKERRQ(ierr); 5151 for (i=0; i<nsends; i++) { 5152 rowlen = svalues + sstarts[i]*sbs; 5153 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5154 for (j=0; j<nrows; j++) { 5155 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5156 for (l=0; l<sbs; l++) { 5157 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5158 5159 rowlen[j*sbs+l] = ncols; 5160 5161 len += ncols; 5162 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5163 } 5164 k++; 5165 } 5166 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5167 5168 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5169 } 5170 /* recvs and sends of i-array are completed */ 5171 i = nrecvs; 5172 while (i--) { 5173 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5174 } 5175 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5176 ierr = PetscFree(svalues);CHKERRQ(ierr); 5177 5178 /* allocate buffers for sending j and a arrays */ 5179 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5180 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5181 5182 /* create i-array of B_oth */ 5183 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5184 5185 b_othi[0] = 0; 5186 len = 0; /* total length of j or a array to be received */ 5187 k = 0; 5188 for (i=0; i<nrecvs; i++) { 5189 rowlen = rvalues + rstarts[i]*rbs; 5190 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 5191 for (j=0; j<nrows; j++) { 5192 b_othi[k+1] = b_othi[k] + rowlen[j]; 5193 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5194 k++; 5195 } 5196 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5197 } 5198 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5199 5200 /* allocate space for j and a arrrays of B_oth */ 5201 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5202 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5203 5204 /* j-array */ 5205 /*---------*/ 5206 /* post receives of j-array */ 5207 for (i=0; i<nrecvs; i++) { 5208 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5209 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5210 } 5211 5212 /* pack the outgoing message j-array */ 5213 k = 0; 5214 for (i=0; i<nsends; i++) { 5215 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5216 bufJ = bufj+sstartsj[i]; 5217 for (j=0; j<nrows; j++) { 5218 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5219 for (ll=0; ll<sbs; ll++) { 5220 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5221 for (l=0; l<ncols; l++) { 5222 *bufJ++ = cols[l]; 5223 } 5224 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5225 } 5226 } 5227 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5228 } 5229 5230 /* recvs and sends of j-array are completed */ 5231 i = nrecvs; 5232 while (i--) { 5233 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5234 } 5235 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5236 } else if (scall == MAT_REUSE_MATRIX) { 5237 sstartsj = *startsj_s; 5238 rstartsj = *startsj_r; 5239 bufa = *bufa_ptr; 5240 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5241 b_otha = b_oth->a; 5242 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5243 5244 /* a-array */ 5245 /*---------*/ 5246 /* post receives of a-array */ 5247 for (i=0; i<nrecvs; i++) { 5248 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5249 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5250 } 5251 5252 /* pack the outgoing message a-array */ 5253 k = 0; 5254 for (i=0; i<nsends; i++) { 5255 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5256 bufA = bufa+sstartsj[i]; 5257 for (j=0; j<nrows; j++) { 5258 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5259 for (ll=0; ll<sbs; ll++) { 5260 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5261 for (l=0; l<ncols; l++) { 5262 *bufA++ = vals[l]; 5263 } 5264 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5265 } 5266 } 5267 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5268 } 5269 /* recvs and sends of a-array are completed */ 5270 i = nrecvs; 5271 while (i--) { 5272 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5273 } 5274 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 5275 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5276 5277 if (scall == MAT_INITIAL_MATRIX) { 5278 /* put together the new matrix */ 5279 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5280 5281 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5282 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5283 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5284 b_oth->free_a = PETSC_TRUE; 5285 b_oth->free_ij = PETSC_TRUE; 5286 b_oth->nonew = 0; 5287 5288 ierr = PetscFree(bufj);CHKERRQ(ierr); 5289 if (!startsj_s || !bufa_ptr) { 5290 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5291 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5292 } else { 5293 *startsj_s = sstartsj; 5294 *startsj_r = rstartsj; 5295 *bufa_ptr = bufa; 5296 } 5297 } 5298 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5299 PetscFunctionReturn(0); 5300 } 5301 5302 /*@C 5303 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5304 5305 Not Collective 5306 5307 Input Parameters: 5308 . A - The matrix in mpiaij format 5309 5310 Output Parameter: 5311 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5312 . colmap - A map from global column index to local index into lvec 5313 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5314 5315 Level: developer 5316 5317 @*/ 5318 #if defined(PETSC_USE_CTABLE) 5319 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5320 #else 5321 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5322 #endif 5323 { 5324 Mat_MPIAIJ *a; 5325 5326 PetscFunctionBegin; 5327 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5328 PetscValidPointer(lvec, 2); 5329 PetscValidPointer(colmap, 3); 5330 PetscValidPointer(multScatter, 4); 5331 a = (Mat_MPIAIJ*) A->data; 5332 if (lvec) *lvec = a->lvec; 5333 if (colmap) *colmap = a->colmap; 5334 if (multScatter) *multScatter = a->Mvctx; 5335 PetscFunctionReturn(0); 5336 } 5337 5338 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5339 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5340 #if defined(PETSC_HAVE_MKL_SPARSE) 5341 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5342 #endif 5343 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5344 #if defined(PETSC_HAVE_ELEMENTAL) 5345 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5346 #endif 5347 #if defined(PETSC_HAVE_HYPRE) 5348 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5349 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5350 #endif 5351 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_IS(Mat,MatType,MatReuse,Mat*); 5352 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIELL(Mat,MatType,MatReuse,Mat*); 5353 5354 /* 5355 Computes (B'*A')' since computing B*A directly is untenable 5356 5357 n p p 5358 ( ) ( ) ( ) 5359 m ( A ) * n ( B ) = m ( C ) 5360 ( ) ( ) ( ) 5361 5362 */ 5363 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5364 { 5365 PetscErrorCode ierr; 5366 Mat At,Bt,Ct; 5367 5368 PetscFunctionBegin; 5369 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5370 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5371 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5372 ierr = MatDestroy(&At);CHKERRQ(ierr); 5373 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5374 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5375 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5376 PetscFunctionReturn(0); 5377 } 5378 5379 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5380 { 5381 PetscErrorCode ierr; 5382 PetscInt m=A->rmap->n,n=B->cmap->n; 5383 Mat Cmat; 5384 5385 PetscFunctionBegin; 5386 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5387 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5388 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5389 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5390 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5391 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5392 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5393 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5394 5395 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5396 5397 *C = Cmat; 5398 PetscFunctionReturn(0); 5399 } 5400 5401 /* ----------------------------------------------------------------*/ 5402 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5403 { 5404 PetscErrorCode ierr; 5405 5406 PetscFunctionBegin; 5407 if (scall == MAT_INITIAL_MATRIX) { 5408 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5409 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5410 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5411 } 5412 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5413 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5414 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5415 PetscFunctionReturn(0); 5416 } 5417 5418 /*MC 5419 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5420 5421 Options Database Keys: 5422 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5423 5424 Level: beginner 5425 5426 .seealso: MatCreateAIJ() 5427 M*/ 5428 5429 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5430 { 5431 Mat_MPIAIJ *b; 5432 PetscErrorCode ierr; 5433 PetscMPIInt size; 5434 5435 PetscFunctionBegin; 5436 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5437 5438 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5439 B->data = (void*)b; 5440 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5441 B->assembled = PETSC_FALSE; 5442 B->insertmode = NOT_SET_VALUES; 5443 b->size = size; 5444 5445 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5446 5447 /* build cache for off array entries formed */ 5448 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5449 5450 b->donotstash = PETSC_FALSE; 5451 b->colmap = 0; 5452 b->garray = 0; 5453 b->roworiented = PETSC_TRUE; 5454 5455 /* stuff used for matrix vector multiply */ 5456 b->lvec = NULL; 5457 b->Mvctx = NULL; 5458 5459 /* stuff for MatGetRow() */ 5460 b->rowindices = 0; 5461 b->rowvalues = 0; 5462 b->getrowactive = PETSC_FALSE; 5463 5464 /* flexible pointer used in CUSP/CUSPARSE classes */ 5465 b->spptr = NULL; 5466 5467 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5468 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5469 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5470 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5471 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5472 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5473 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5474 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5475 #if defined(PETSC_HAVE_MKL_SPARSE) 5476 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5477 #endif 5478 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5479 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5480 #if defined(PETSC_HAVE_ELEMENTAL) 5481 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5482 #endif 5483 #if defined(PETSC_HAVE_HYPRE) 5484 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5485 #endif 5486 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_MPIAIJ_IS);CHKERRQ(ierr); 5487 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiell_C",MatConvert_MPIAIJ_MPIELL);CHKERRQ(ierr); 5488 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5489 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5490 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5491 #if defined(PETSC_HAVE_HYPRE) 5492 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5493 #endif 5494 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5495 PetscFunctionReturn(0); 5496 } 5497 5498 /*@C 5499 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5500 and "off-diagonal" part of the matrix in CSR format. 5501 5502 Collective on MPI_Comm 5503 5504 Input Parameters: 5505 + comm - MPI communicator 5506 . m - number of local rows (Cannot be PETSC_DECIDE) 5507 . n - This value should be the same as the local size used in creating the 5508 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5509 calculated if N is given) For square matrices n is almost always m. 5510 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5511 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5512 . i - row indices for "diagonal" portion of matrix 5513 . j - column indices 5514 . a - matrix values 5515 . oi - row indices for "off-diagonal" portion of matrix 5516 . oj - column indices 5517 - oa - matrix values 5518 5519 Output Parameter: 5520 . mat - the matrix 5521 5522 Level: advanced 5523 5524 Notes: 5525 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5526 must free the arrays once the matrix has been destroyed and not before. 5527 5528 The i and j indices are 0 based 5529 5530 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5531 5532 This sets local rows and cannot be used to set off-processor values. 5533 5534 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5535 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5536 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5537 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5538 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5539 communication if it is known that only local entries will be set. 5540 5541 .keywords: matrix, aij, compressed row, sparse, parallel 5542 5543 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5544 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5545 @*/ 5546 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5547 { 5548 PetscErrorCode ierr; 5549 Mat_MPIAIJ *maij; 5550 5551 PetscFunctionBegin; 5552 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5553 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5554 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5555 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5556 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5557 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5558 maij = (Mat_MPIAIJ*) (*mat)->data; 5559 5560 (*mat)->preallocated = PETSC_TRUE; 5561 5562 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5563 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5564 5565 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5566 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5567 5568 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5569 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5570 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5571 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5572 5573 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5574 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5575 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5576 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5577 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5578 PetscFunctionReturn(0); 5579 } 5580 5581 /* 5582 Special version for direct calls from Fortran 5583 */ 5584 #include <petsc/private/fortranimpl.h> 5585 5586 /* Change these macros so can be used in void function */ 5587 #undef CHKERRQ 5588 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5589 #undef SETERRQ2 5590 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5591 #undef SETERRQ3 5592 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5593 #undef SETERRQ 5594 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5595 5596 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5597 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5598 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5599 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5600 #else 5601 #endif 5602 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5603 { 5604 Mat mat = *mmat; 5605 PetscInt m = *mm, n = *mn; 5606 InsertMode addv = *maddv; 5607 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5608 PetscScalar value; 5609 PetscErrorCode ierr; 5610 5611 MatCheckPreallocated(mat,1); 5612 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5613 5614 #if defined(PETSC_USE_DEBUG) 5615 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5616 #endif 5617 { 5618 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5619 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5620 PetscBool roworiented = aij->roworiented; 5621 5622 /* Some Variables required in the macro */ 5623 Mat A = aij->A; 5624 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5625 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5626 MatScalar *aa = a->a; 5627 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5628 Mat B = aij->B; 5629 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5630 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5631 MatScalar *ba = b->a; 5632 5633 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5634 PetscInt nonew = a->nonew; 5635 MatScalar *ap1,*ap2; 5636 5637 PetscFunctionBegin; 5638 for (i=0; i<m; i++) { 5639 if (im[i] < 0) continue; 5640 #if defined(PETSC_USE_DEBUG) 5641 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5642 #endif 5643 if (im[i] >= rstart && im[i] < rend) { 5644 row = im[i] - rstart; 5645 lastcol1 = -1; 5646 rp1 = aj + ai[row]; 5647 ap1 = aa + ai[row]; 5648 rmax1 = aimax[row]; 5649 nrow1 = ailen[row]; 5650 low1 = 0; 5651 high1 = nrow1; 5652 lastcol2 = -1; 5653 rp2 = bj + bi[row]; 5654 ap2 = ba + bi[row]; 5655 rmax2 = bimax[row]; 5656 nrow2 = bilen[row]; 5657 low2 = 0; 5658 high2 = nrow2; 5659 5660 for (j=0; j<n; j++) { 5661 if (roworiented) value = v[i*n+j]; 5662 else value = v[i+j*m]; 5663 if (in[j] >= cstart && in[j] < cend) { 5664 col = in[j] - cstart; 5665 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5666 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5667 } else if (in[j] < 0) continue; 5668 #if defined(PETSC_USE_DEBUG) 5669 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5670 #endif 5671 else { 5672 if (mat->was_assembled) { 5673 if (!aij->colmap) { 5674 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5675 } 5676 #if defined(PETSC_USE_CTABLE) 5677 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5678 col--; 5679 #else 5680 col = aij->colmap[in[j]] - 1; 5681 #endif 5682 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5683 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5684 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5685 col = in[j]; 5686 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5687 B = aij->B; 5688 b = (Mat_SeqAIJ*)B->data; 5689 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5690 rp2 = bj + bi[row]; 5691 ap2 = ba + bi[row]; 5692 rmax2 = bimax[row]; 5693 nrow2 = bilen[row]; 5694 low2 = 0; 5695 high2 = nrow2; 5696 bm = aij->B->rmap->n; 5697 ba = b->a; 5698 } 5699 } else col = in[j]; 5700 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5701 } 5702 } 5703 } else if (!aij->donotstash) { 5704 if (roworiented) { 5705 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5706 } else { 5707 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5708 } 5709 } 5710 } 5711 } 5712 PetscFunctionReturnVoid(); 5713 } 5714 5715