1 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 3 #include <petsc/private/vecimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 8 /*MC 9 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 10 11 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 12 and MATMPIAIJ otherwise. As a result, for single process communicators, 13 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported 14 for communicators controlling multiple processes. It is recommended that you call both of 15 the above preallocation routines for simplicity. 16 17 Options Database Keys: 18 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 19 20 Developer Notes: Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJCRL, and also automatically switches over to use inodes when 21 enough exist. 22 23 Level: beginner 24 25 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 26 M*/ 27 28 /*MC 29 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 30 31 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 32 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 33 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 34 for communicators controlling multiple processes. It is recommended that you call both of 35 the above preallocation routines for simplicity. 36 37 Options Database Keys: 38 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 39 40 Level: beginner 41 42 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 43 M*/ 44 45 #undef __FUNCT__ 46 #define __FUNCT__ "MatFindNonzeroRows_MPIAIJ" 47 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 48 { 49 PetscErrorCode ierr; 50 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 51 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 52 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 53 const PetscInt *ia,*ib; 54 const MatScalar *aa,*bb; 55 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 56 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 57 58 PetscFunctionBegin; 59 *keptrows = 0; 60 ia = a->i; 61 ib = b->i; 62 for (i=0; i<m; i++) { 63 na = ia[i+1] - ia[i]; 64 nb = ib[i+1] - ib[i]; 65 if (!na && !nb) { 66 cnt++; 67 goto ok1; 68 } 69 aa = a->a + ia[i]; 70 for (j=0; j<na; j++) { 71 if (aa[j] != 0.0) goto ok1; 72 } 73 bb = b->a + ib[i]; 74 for (j=0; j <nb; j++) { 75 if (bb[j] != 0.0) goto ok1; 76 } 77 cnt++; 78 ok1:; 79 } 80 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 81 if (!n0rows) PetscFunctionReturn(0); 82 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 83 cnt = 0; 84 for (i=0; i<m; i++) { 85 na = ia[i+1] - ia[i]; 86 nb = ib[i+1] - ib[i]; 87 if (!na && !nb) continue; 88 aa = a->a + ia[i]; 89 for (j=0; j<na;j++) { 90 if (aa[j] != 0.0) { 91 rows[cnt++] = rstart + i; 92 goto ok2; 93 } 94 } 95 bb = b->a + ib[i]; 96 for (j=0; j<nb; j++) { 97 if (bb[j] != 0.0) { 98 rows[cnt++] = rstart + i; 99 goto ok2; 100 } 101 } 102 ok2:; 103 } 104 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatDiagonalSet_MPIAIJ" 110 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 111 { 112 PetscErrorCode ierr; 113 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 114 115 PetscFunctionBegin; 116 if (Y->assembled && Y->rmap->rstart == Y->cmap->rstart && Y->rmap->rend == Y->cmap->rend) { 117 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 118 } else { 119 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 120 } 121 PetscFunctionReturn(0); 122 } 123 124 125 #undef __FUNCT__ 126 #define __FUNCT__ "MatFindZeroDiagonals_MPIAIJ" 127 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 128 { 129 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 130 PetscErrorCode ierr; 131 PetscInt i,rstart,nrows,*rows; 132 133 PetscFunctionBegin; 134 *zrows = NULL; 135 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 136 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 137 for (i=0; i<nrows; i++) rows[i] += rstart; 138 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 139 PetscFunctionReturn(0); 140 } 141 142 #undef __FUNCT__ 143 #define __FUNCT__ "MatGetColumnNorms_MPIAIJ" 144 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 145 { 146 PetscErrorCode ierr; 147 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 148 PetscInt i,n,*garray = aij->garray; 149 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 150 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 151 PetscReal *work; 152 153 PetscFunctionBegin; 154 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 155 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 156 if (type == NORM_2) { 157 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 158 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 159 } 160 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 161 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 162 } 163 } else if (type == NORM_1) { 164 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 165 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 166 } 167 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 168 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 169 } 170 } else if (type == NORM_INFINITY) { 171 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 172 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 173 } 174 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 175 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 176 } 177 178 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 179 if (type == NORM_INFINITY) { 180 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 181 } else { 182 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 183 } 184 ierr = PetscFree(work);CHKERRQ(ierr); 185 if (type == NORM_2) { 186 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 187 } 188 PetscFunctionReturn(0); 189 } 190 191 #undef __FUNCT__ 192 #define __FUNCT__ "MatFindOffBlockDiagonalEntries_MPIAIJ" 193 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 194 { 195 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 196 IS sis,gis; 197 PetscErrorCode ierr; 198 const PetscInt *isis,*igis; 199 PetscInt n,*iis,nsis,ngis,rstart,i; 200 201 PetscFunctionBegin; 202 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 203 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 204 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 205 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 206 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 207 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 208 209 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 210 ierr = PetscMemcpy(iis,igis,ngis*sizeof(PetscInt));CHKERRQ(ierr); 211 ierr = PetscMemcpy(iis+ngis,isis,nsis*sizeof(PetscInt));CHKERRQ(ierr); 212 n = ngis + nsis; 213 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 214 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 215 for (i=0; i<n; i++) iis[i] += rstart; 216 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 217 218 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 219 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 220 ierr = ISDestroy(&sis);CHKERRQ(ierr); 221 ierr = ISDestroy(&gis);CHKERRQ(ierr); 222 PetscFunctionReturn(0); 223 } 224 225 #undef __FUNCT__ 226 #define __FUNCT__ "MatDistribute_MPIAIJ" 227 /* 228 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 229 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 230 231 Only for square matrices 232 233 Used by a preconditioner, hence PETSC_EXTERN 234 */ 235 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 236 { 237 PetscMPIInt rank,size; 238 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 239 PetscErrorCode ierr; 240 Mat mat; 241 Mat_SeqAIJ *gmata; 242 PetscMPIInt tag; 243 MPI_Status status; 244 PetscBool aij; 245 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 246 247 PetscFunctionBegin; 248 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 249 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 250 if (!rank) { 251 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 252 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 253 } 254 if (reuse == MAT_INITIAL_MATRIX) { 255 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 256 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 257 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 258 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 259 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 260 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 261 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 262 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 263 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 264 265 rowners[0] = 0; 266 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 267 rstart = rowners[rank]; 268 rend = rowners[rank+1]; 269 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 270 if (!rank) { 271 gmata = (Mat_SeqAIJ*) gmat->data; 272 /* send row lengths to all processors */ 273 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 274 for (i=1; i<size; i++) { 275 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 276 } 277 /* determine number diagonal and off-diagonal counts */ 278 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 279 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 280 jj = 0; 281 for (i=0; i<m; i++) { 282 for (j=0; j<dlens[i]; j++) { 283 if (gmata->j[jj] < rstart) ld[i]++; 284 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 285 jj++; 286 } 287 } 288 /* send column indices to other processes */ 289 for (i=1; i<size; i++) { 290 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 291 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 292 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 293 } 294 295 /* send numerical values to other processes */ 296 for (i=1; i<size; i++) { 297 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 298 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 299 } 300 gmataa = gmata->a; 301 gmataj = gmata->j; 302 303 } else { 304 /* receive row lengths */ 305 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 306 /* receive column indices */ 307 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 308 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 309 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 310 /* determine number diagonal and off-diagonal counts */ 311 ierr = PetscMemzero(olens,m*sizeof(PetscInt));CHKERRQ(ierr); 312 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 313 jj = 0; 314 for (i=0; i<m; i++) { 315 for (j=0; j<dlens[i]; j++) { 316 if (gmataj[jj] < rstart) ld[i]++; 317 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 318 jj++; 319 } 320 } 321 /* receive numerical values */ 322 ierr = PetscMemzero(gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 323 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 324 } 325 /* set preallocation */ 326 for (i=0; i<m; i++) { 327 dlens[i] -= olens[i]; 328 } 329 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 330 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 331 332 for (i=0; i<m; i++) { 333 dlens[i] += olens[i]; 334 } 335 cnt = 0; 336 for (i=0; i<m; i++) { 337 row = rstart + i; 338 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 339 cnt += dlens[i]; 340 } 341 if (rank) { 342 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 343 } 344 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 345 ierr = PetscFree(rowners);CHKERRQ(ierr); 346 347 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 348 349 *inmat = mat; 350 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 351 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 352 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 353 mat = *inmat; 354 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 355 if (!rank) { 356 /* send numerical values to other processes */ 357 gmata = (Mat_SeqAIJ*) gmat->data; 358 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 359 gmataa = gmata->a; 360 for (i=1; i<size; i++) { 361 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 362 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 363 } 364 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 365 } else { 366 /* receive numerical values from process 0*/ 367 nz = Ad->nz + Ao->nz; 368 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 369 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 370 } 371 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 372 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 373 ad = Ad->a; 374 ao = Ao->a; 375 if (mat->rmap->n) { 376 i = 0; 377 nz = ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 378 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 379 } 380 for (i=1; i<mat->rmap->n; i++) { 381 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ao += nz; gmataa += nz; 382 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscMemcpy(ad,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); ad += nz; gmataa += nz; 383 } 384 i--; 385 if (mat->rmap->n) { 386 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscMemcpy(ao,gmataa,nz*sizeof(PetscScalar));CHKERRQ(ierr); 387 } 388 if (rank) { 389 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 390 } 391 } 392 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 393 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 394 PetscFunctionReturn(0); 395 } 396 397 /* 398 Local utility routine that creates a mapping from the global column 399 number to the local number in the off-diagonal part of the local 400 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 401 a slightly higher hash table cost; without it it is not scalable (each processor 402 has an order N integer array but is fast to acess. 403 */ 404 #undef __FUNCT__ 405 #define __FUNCT__ "MatCreateColmap_MPIAIJ_Private" 406 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 407 { 408 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 409 PetscErrorCode ierr; 410 PetscInt n = aij->B->cmap->n,i; 411 412 PetscFunctionBegin; 413 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 414 #if defined(PETSC_USE_CTABLE) 415 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 416 for (i=0; i<n; i++) { 417 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 418 } 419 #else 420 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 421 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 422 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 423 #endif 424 PetscFunctionReturn(0); 425 } 426 427 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 428 { \ 429 if (col <= lastcol1) low1 = 0; \ 430 else high1 = nrow1; \ 431 lastcol1 = col;\ 432 while (high1-low1 > 5) { \ 433 t = (low1+high1)/2; \ 434 if (rp1[t] > col) high1 = t; \ 435 else low1 = t; \ 436 } \ 437 for (_i=low1; _i<high1; _i++) { \ 438 if (rp1[_i] > col) break; \ 439 if (rp1[_i] == col) { \ 440 if (addv == ADD_VALUES) ap1[_i] += value; \ 441 else ap1[_i] = value; \ 442 goto a_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 446 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 447 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 448 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 449 N = nrow1++ - 1; a->nz++; high1++; \ 450 /* shift up all the later entries in this row */ \ 451 for (ii=N; ii>=_i; ii--) { \ 452 rp1[ii+1] = rp1[ii]; \ 453 ap1[ii+1] = ap1[ii]; \ 454 } \ 455 rp1[_i] = col; \ 456 ap1[_i] = value; \ 457 A->nonzerostate++;\ 458 a_noinsert: ; \ 459 ailen[row] = nrow1; \ 460 } 461 462 463 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 464 { \ 465 if (col <= lastcol2) low2 = 0; \ 466 else high2 = nrow2; \ 467 lastcol2 = col; \ 468 while (high2-low2 > 5) { \ 469 t = (low2+high2)/2; \ 470 if (rp2[t] > col) high2 = t; \ 471 else low2 = t; \ 472 } \ 473 for (_i=low2; _i<high2; _i++) { \ 474 if (rp2[_i] > col) break; \ 475 if (rp2[_i] == col) { \ 476 if (addv == ADD_VALUES) ap2[_i] += value; \ 477 else ap2[_i] = value; \ 478 goto b_noinsert; \ 479 } \ 480 } \ 481 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 482 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 483 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 484 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 485 N = nrow2++ - 1; b->nz++; high2++; \ 486 /* shift up all the later entries in this row */ \ 487 for (ii=N; ii>=_i; ii--) { \ 488 rp2[ii+1] = rp2[ii]; \ 489 ap2[ii+1] = ap2[ii]; \ 490 } \ 491 rp2[_i] = col; \ 492 ap2[_i] = value; \ 493 B->nonzerostate++; \ 494 b_noinsert: ; \ 495 bilen[row] = nrow2; \ 496 } 497 498 #undef __FUNCT__ 499 #define __FUNCT__ "MatSetValuesRow_MPIAIJ" 500 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 501 { 502 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 503 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 504 PetscErrorCode ierr; 505 PetscInt l,*garray = mat->garray,diag; 506 507 PetscFunctionBegin; 508 /* code only works for square matrices A */ 509 510 /* find size of row to the left of the diagonal part */ 511 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 512 row = row - diag; 513 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 514 if (garray[b->j[b->i[row]+l]] > diag) break; 515 } 516 ierr = PetscMemcpy(b->a+b->i[row],v,l*sizeof(PetscScalar));CHKERRQ(ierr); 517 518 /* diagonal part */ 519 ierr = PetscMemcpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row])*sizeof(PetscScalar));CHKERRQ(ierr); 520 521 /* right of diagonal part */ 522 ierr = PetscMemcpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],(b->i[row+1]-b->i[row]-l)*sizeof(PetscScalar));CHKERRQ(ierr); 523 PetscFunctionReturn(0); 524 } 525 526 #undef __FUNCT__ 527 #define __FUNCT__ "MatSetValues_MPIAIJ" 528 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 529 { 530 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 531 PetscScalar value; 532 PetscErrorCode ierr; 533 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 540 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 541 MatScalar *aa = a->a; 542 PetscBool ignorezeroentries = a->ignorezeroentries; 543 Mat B = aij->B; 544 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 545 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 546 MatScalar *ba = b->a; 547 548 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1,*ap2; 551 552 PetscFunctionBegin; 553 for (i=0; i<m; i++) { 554 if (im[i] < 0) continue; 555 #if defined(PETSC_USE_DEBUG) 556 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 557 #endif 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = aj + ai[row]; 562 ap1 = aa + ai[row]; 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = bj + bi[row]; 569 ap2 = ba + bi[row]; 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j=0; j<n; j++) { 576 if (roworiented) value = v[i*n+j]; 577 else value = v[i+j*m]; 578 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 579 if (in[j] >= cstart && in[j] < cend) { 580 col = in[j] - cstart; 581 nonew = a->nonew; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 614 } else col = in[j]; 615 nonew = b->nonew; 616 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 617 } 618 } 619 } else { 620 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 621 if (!aij->donotstash) { 622 mat->assembled = PETSC_FALSE; 623 if (roworiented) { 624 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 625 } else { 626 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 627 } 628 } 629 } 630 } 631 PetscFunctionReturn(0); 632 } 633 634 #undef __FUNCT__ 635 #define __FUNCT__ "MatGetValues_MPIAIJ" 636 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 637 { 638 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 639 PetscErrorCode ierr; 640 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 641 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 642 643 PetscFunctionBegin; 644 for (i=0; i<m; i++) { 645 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 646 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 647 if (idxm[i] >= rstart && idxm[i] < rend) { 648 row = idxm[i] - rstart; 649 for (j=0; j<n; j++) { 650 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 651 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 652 if (idxn[j] >= cstart && idxn[j] < cend) { 653 col = idxn[j] - cstart; 654 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 655 } else { 656 if (!aij->colmap) { 657 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 658 } 659 #if defined(PETSC_USE_CTABLE) 660 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 661 col--; 662 #else 663 col = aij->colmap[idxn[j]] - 1; 664 #endif 665 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 666 else { 667 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 668 } 669 } 670 } 671 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 672 } 673 PetscFunctionReturn(0); 674 } 675 676 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 677 678 #undef __FUNCT__ 679 #define __FUNCT__ "MatAssemblyBegin_MPIAIJ" 680 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 681 { 682 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 683 PetscErrorCode ierr; 684 PetscInt nstash,reallocs; 685 686 PetscFunctionBegin; 687 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 688 689 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 690 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 691 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 692 PetscFunctionReturn(0); 693 } 694 695 #undef __FUNCT__ 696 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ" 697 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 698 { 699 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 700 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 701 PetscErrorCode ierr; 702 PetscMPIInt n; 703 PetscInt i,j,rstart,ncols,flg; 704 PetscInt *row,*col; 705 PetscBool other_disassembled; 706 PetscScalar *val; 707 708 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 709 710 PetscFunctionBegin; 711 if (!aij->donotstash && !mat->nooffprocentries) { 712 while (1) { 713 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 714 if (!flg) break; 715 716 for (i=0; i<n; ) { 717 /* Now identify the consecutive vals belonging to the same row */ 718 for (j=i,rstart=row[j]; j<n; j++) { 719 if (row[j] != rstart) break; 720 } 721 if (j < n) ncols = j-i; 722 else ncols = n-i; 723 /* Now assemble all these values with a single function call */ 724 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 725 726 i = j; 727 } 728 } 729 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 730 } 731 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 732 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 733 734 /* determine if any processor has disassembled, if so we must 735 also disassemble ourselfs, in order that we may reassemble. */ 736 /* 737 if nonzero structure of submatrix B cannot change then we know that 738 no processor disassembled thus we can skip this stuff 739 */ 740 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 741 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 742 if (mat->was_assembled && !other_disassembled) { 743 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 744 } 745 } 746 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 747 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 748 } 749 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 750 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 751 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 752 753 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 754 755 aij->rowvalues = 0; 756 757 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 758 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 759 760 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 761 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 762 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 763 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 764 } 765 PetscFunctionReturn(0); 766 } 767 768 #undef __FUNCT__ 769 #define __FUNCT__ "MatZeroEntries_MPIAIJ" 770 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 771 { 772 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 773 PetscErrorCode ierr; 774 775 PetscFunctionBegin; 776 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 777 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 778 PetscFunctionReturn(0); 779 } 780 781 #undef __FUNCT__ 782 #define __FUNCT__ "MatZeroRows_MPIAIJ" 783 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 784 { 785 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 786 PetscInt *lrows; 787 PetscInt r, len; 788 PetscErrorCode ierr; 789 790 PetscFunctionBegin; 791 /* get locally owned rows */ 792 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 793 /* fix right hand side if needed */ 794 if (x && b) { 795 const PetscScalar *xx; 796 PetscScalar *bb; 797 798 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 799 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 800 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 801 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 802 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 803 } 804 /* Must zero l->B before l->A because the (diag) case below may put values into l->B*/ 805 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 806 if (A->congruentlayouts == -1) { /* first time we compare rows and cols layouts */ 807 PetscBool cong; 808 ierr = PetscLayoutCompare(A->rmap,A->cmap,&cong);CHKERRQ(ierr); 809 if (cong) A->congruentlayouts = 1; 810 else A->congruentlayouts = 0; 811 } 812 if ((diag != 0.0) && A->congruentlayouts) { 813 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 814 } else if (diag != 0.0) { 815 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 816 if (((Mat_SeqAIJ *) mat->A->data)->nonew) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options\nMAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 817 for (r = 0; r < len; ++r) { 818 const PetscInt row = lrows[r] + A->rmap->rstart; 819 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 820 } 821 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 822 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 823 } else { 824 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 825 } 826 ierr = PetscFree(lrows);CHKERRQ(ierr); 827 828 /* only change matrix nonzero state if pattern was allowed to be changed */ 829 if (!((Mat_SeqAIJ*)(mat->A->data))->keepnonzeropattern) { 830 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 831 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 832 } 833 PetscFunctionReturn(0); 834 } 835 836 #undef __FUNCT__ 837 #define __FUNCT__ "MatZeroRowsColumns_MPIAIJ" 838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 839 { 840 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 841 PetscErrorCode ierr; 842 PetscMPIInt n = A->rmap->n; 843 PetscInt i,j,r,m,p = 0,len = 0; 844 PetscInt *lrows,*owners = A->rmap->range; 845 PetscSFNode *rrows; 846 PetscSF sf; 847 const PetscScalar *xx; 848 PetscScalar *bb,*mask; 849 Vec xmask,lmask; 850 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 851 const PetscInt *aj, *ii,*ridx; 852 PetscScalar *aa; 853 854 PetscFunctionBegin; 855 /* Create SF where leaves are input rows and roots are owned rows */ 856 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 857 for (r = 0; r < n; ++r) lrows[r] = -1; 858 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 859 for (r = 0; r < N; ++r) { 860 const PetscInt idx = rows[r]; 861 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 862 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 863 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 864 } 865 rrows[r].rank = p; 866 rrows[r].index = rows[r] - owners[p]; 867 } 868 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 869 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 870 /* Collect flags for rows to be zeroed */ 871 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 872 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 873 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 874 /* Compress and put in row numbers */ 875 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 876 /* zero diagonal part of matrix */ 877 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 878 /* handle off diagonal part of matrix */ 879 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 880 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 881 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 882 for (i=0; i<len; i++) bb[lrows[i]] = 1; 883 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 884 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 885 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 886 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 887 if (x) { 888 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 889 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 890 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 891 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 892 } 893 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 894 /* remove zeroed rows of off diagonal matrix */ 895 ii = aij->i; 896 for (i=0; i<len; i++) { 897 ierr = PetscMemzero(aij->a + ii[lrows[i]],(ii[lrows[i]+1] - ii[lrows[i]])*sizeof(PetscScalar));CHKERRQ(ierr); 898 } 899 /* loop over all elements of off process part of matrix zeroing removed columns*/ 900 if (aij->compressedrow.use) { 901 m = aij->compressedrow.nrows; 902 ii = aij->compressedrow.i; 903 ridx = aij->compressedrow.rindex; 904 for (i=0; i<m; i++) { 905 n = ii[i+1] - ii[i]; 906 aj = aij->j + ii[i]; 907 aa = aij->a + ii[i]; 908 909 for (j=0; j<n; j++) { 910 if (PetscAbsScalar(mask[*aj])) { 911 if (b) bb[*ridx] -= *aa*xx[*aj]; 912 *aa = 0.0; 913 } 914 aa++; 915 aj++; 916 } 917 ridx++; 918 } 919 } else { /* do not use compressed row format */ 920 m = l->B->rmap->n; 921 for (i=0; i<m; i++) { 922 n = ii[i+1] - ii[i]; 923 aj = aij->j + ii[i]; 924 aa = aij->a + ii[i]; 925 for (j=0; j<n; j++) { 926 if (PetscAbsScalar(mask[*aj])) { 927 if (b) bb[i] -= *aa*xx[*aj]; 928 *aa = 0.0; 929 } 930 aa++; 931 aj++; 932 } 933 } 934 } 935 if (x) { 936 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 937 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 938 } 939 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 940 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 941 ierr = PetscFree(lrows);CHKERRQ(ierr); 942 943 /* only change matrix nonzero state if pattern was allowed to be changed */ 944 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 945 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 946 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 947 } 948 PetscFunctionReturn(0); 949 } 950 951 #undef __FUNCT__ 952 #define __FUNCT__ "MatMult_MPIAIJ" 953 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 954 { 955 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 956 PetscErrorCode ierr; 957 PetscInt nt; 958 959 PetscFunctionBegin; 960 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 961 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 962 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 964 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 965 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 966 PetscFunctionReturn(0); 967 } 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "MatMultDiagonalBlock_MPIAIJ" 971 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 972 { 973 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 974 PetscErrorCode ierr; 975 976 PetscFunctionBegin; 977 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 978 PetscFunctionReturn(0); 979 } 980 981 #undef __FUNCT__ 982 #define __FUNCT__ "MatMultAdd_MPIAIJ" 983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 984 { 985 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 986 PetscErrorCode ierr; 987 988 PetscFunctionBegin; 989 ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 990 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 991 ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 993 PetscFunctionReturn(0); 994 } 995 996 #undef __FUNCT__ 997 #define __FUNCT__ "MatMultTranspose_MPIAIJ" 998 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 999 { 1000 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1001 PetscErrorCode ierr; 1002 PetscBool merged; 1003 1004 PetscFunctionBegin; 1005 ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr); 1006 /* do nondiagonal part */ 1007 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1008 if (!merged) { 1009 /* send it on its way */ 1010 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1011 /* do local part */ 1012 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1013 /* receive remote parts: note this assumes the values are not actually */ 1014 /* added in yy until the next line, */ 1015 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1016 } else { 1017 /* do local part */ 1018 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1019 /* send it on its way */ 1020 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1021 /* values actually were received in the Begin() but we need to call this nop */ 1022 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1023 } 1024 PetscFunctionReturn(0); 1025 } 1026 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatIsTranspose_MPIAIJ" 1029 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1030 { 1031 MPI_Comm comm; 1032 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1033 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1034 IS Me,Notme; 1035 PetscErrorCode ierr; 1036 PetscInt M,N,first,last,*notme,i; 1037 PetscMPIInt size; 1038 1039 PetscFunctionBegin; 1040 /* Easy test: symmetric diagonal block */ 1041 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1042 ierr = MatIsTranspose(Adia,Bdia,tol,f);CHKERRQ(ierr); 1043 if (!*f) PetscFunctionReturn(0); 1044 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1045 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1046 if (size == 1) PetscFunctionReturn(0); 1047 1048 /* Hard test: off-diagonal block. This takes a MatGetSubMatrix. */ 1049 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1050 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1051 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1052 for (i=0; i<first; i++) notme[i] = i; 1053 for (i=last; i<M; i++) notme[i-last+first] = i; 1054 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1055 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1056 ierr = MatGetSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1057 Aoff = Aoffs[0]; 1058 ierr = MatGetSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1059 Boff = Boffs[0]; 1060 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1061 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1062 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1063 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1064 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1065 ierr = PetscFree(notme);CHKERRQ(ierr); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 #undef __FUNCT__ 1070 #define __FUNCT__ "MatMultTransposeAdd_MPIAIJ" 1071 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1072 { 1073 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1074 PetscErrorCode ierr; 1075 1076 PetscFunctionBegin; 1077 /* do nondiagonal part */ 1078 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1079 /* send it on its way */ 1080 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1081 /* do local part */ 1082 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1083 /* receive remote parts */ 1084 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 /* 1089 This only works correctly for square matrices where the subblock A->A is the 1090 diagonal block 1091 */ 1092 #undef __FUNCT__ 1093 #define __FUNCT__ "MatGetDiagonal_MPIAIJ" 1094 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1095 { 1096 PetscErrorCode ierr; 1097 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1098 1099 PetscFunctionBegin; 1100 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1101 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1102 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 #undef __FUNCT__ 1107 #define __FUNCT__ "MatScale_MPIAIJ" 1108 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1109 { 1110 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1111 PetscErrorCode ierr; 1112 1113 PetscFunctionBegin; 1114 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1115 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 #undef __FUNCT__ 1120 #define __FUNCT__ "MatDestroy_MPIAIJ" 1121 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1122 { 1123 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 #if defined(PETSC_USE_LOG) 1128 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1129 #endif 1130 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1131 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1132 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1133 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1134 #if defined(PETSC_USE_CTABLE) 1135 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1136 #else 1137 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1138 #endif 1139 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1140 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1141 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1142 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1143 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1144 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1145 1146 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1147 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1148 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1149 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1150 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1151 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1152 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1153 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1154 #if defined(PETSC_HAVE_ELEMENTAL) 1155 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1156 #endif 1157 #if defined(PETSC_HAVE_HYPRE) 1158 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1159 #endif 1160 PetscFunctionReturn(0); 1161 } 1162 1163 #undef __FUNCT__ 1164 #define __FUNCT__ "MatView_MPIAIJ_Binary" 1165 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1170 PetscErrorCode ierr; 1171 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1172 int fd; 1173 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1174 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1175 PetscScalar *column_values; 1176 PetscInt message_count,flowcontrolcount; 1177 FILE *file; 1178 1179 PetscFunctionBegin; 1180 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1181 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1182 nz = A->nz + B->nz; 1183 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1184 if (!rank) { 1185 header[0] = MAT_FILE_CLASSID; 1186 header[1] = mat->rmap->N; 1187 header[2] = mat->cmap->N; 1188 1189 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1190 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1191 /* get largest number of rows any processor has */ 1192 rlen = mat->rmap->n; 1193 range = mat->rmap->range; 1194 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1195 } else { 1196 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1197 rlen = mat->rmap->n; 1198 } 1199 1200 /* load up the local row counts */ 1201 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1202 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1203 1204 /* store the row lengths to the file */ 1205 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1206 if (!rank) { 1207 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1208 for (i=1; i<size; i++) { 1209 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1210 rlen = range[i+1] - range[i]; 1211 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1212 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1213 } 1214 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1215 } else { 1216 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1217 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1218 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1219 } 1220 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1221 1222 /* load up the local column indices */ 1223 nzmax = nz; /* th processor needs space a largest processor needs */ 1224 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1225 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1226 cnt = 0; 1227 for (i=0; i<mat->rmap->n; i++) { 1228 for (j=B->i[i]; j<B->i[i+1]; j++) { 1229 if ((col = garray[B->j[j]]) > cstart) break; 1230 column_indices[cnt++] = col; 1231 } 1232 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1233 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1234 } 1235 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1236 1237 /* store the column indices to the file */ 1238 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1239 if (!rank) { 1240 MPI_Status status; 1241 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1242 for (i=1; i<size; i++) { 1243 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1244 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1245 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1246 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1247 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1248 } 1249 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1250 } else { 1251 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1252 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1253 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1254 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1255 } 1256 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1257 1258 /* load up the local column values */ 1259 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1260 cnt = 0; 1261 for (i=0; i<mat->rmap->n; i++) { 1262 for (j=B->i[i]; j<B->i[i+1]; j++) { 1263 if (garray[B->j[j]] > cstart) break; 1264 column_values[cnt++] = B->a[j]; 1265 } 1266 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1267 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1268 } 1269 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1270 1271 /* store the column values to the file */ 1272 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1273 if (!rank) { 1274 MPI_Status status; 1275 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1276 for (i=1; i<size; i++) { 1277 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1278 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1279 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1280 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1281 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1282 } 1283 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1284 } else { 1285 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1286 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1287 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1288 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1289 } 1290 ierr = PetscFree(column_values);CHKERRQ(ierr); 1291 1292 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1293 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1294 PetscFunctionReturn(0); 1295 } 1296 1297 #include <petscdraw.h> 1298 #undef __FUNCT__ 1299 #define __FUNCT__ "MatView_MPIAIJ_ASCIIorDraworSocket" 1300 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1301 { 1302 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1303 PetscErrorCode ierr; 1304 PetscMPIInt rank = aij->rank,size = aij->size; 1305 PetscBool isdraw,iascii,isbinary; 1306 PetscViewer sviewer; 1307 PetscViewerFormat format; 1308 1309 PetscFunctionBegin; 1310 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1311 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1312 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1313 if (iascii) { 1314 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1315 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1316 MatInfo info; 1317 PetscBool inodes; 1318 1319 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1320 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1321 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1322 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1323 if (!inodes) { 1324 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, not using I-node routines\n", 1325 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1326 } else { 1327 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %D, using I-node routines\n", 1328 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 1329 } 1330 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1331 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1332 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1333 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1334 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1335 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1336 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1337 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1338 PetscFunctionReturn(0); 1339 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1340 PetscInt inodecount,inodelimit,*inodes; 1341 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1342 if (inodes) { 1343 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1344 } else { 1345 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1346 } 1347 PetscFunctionReturn(0); 1348 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1349 PetscFunctionReturn(0); 1350 } 1351 } else if (isbinary) { 1352 if (size == 1) { 1353 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1354 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1355 } else { 1356 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1357 } 1358 PetscFunctionReturn(0); 1359 } else if (isdraw) { 1360 PetscDraw draw; 1361 PetscBool isnull; 1362 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1363 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1364 if (isnull) PetscFunctionReturn(0); 1365 } 1366 1367 { 1368 /* assemble the entire matrix onto first processor. */ 1369 Mat A; 1370 Mat_SeqAIJ *Aloc; 1371 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,*ai,*aj,row,*cols,i,*ct; 1372 MatScalar *a; 1373 1374 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 1375 if (!rank) { 1376 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 1377 } else { 1378 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 1379 } 1380 /* This is just a temporary matrix, so explicitly using MATMPIAIJ is probably best */ 1381 ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr); 1382 ierr = MatMPIAIJSetPreallocation(A,0,NULL,0,NULL);CHKERRQ(ierr); 1383 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1384 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 1385 1386 /* copy over the A part */ 1387 Aloc = (Mat_SeqAIJ*)aij->A->data; 1388 m = aij->A->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1389 row = mat->rmap->rstart; 1390 for (i=0; i<ai[m]; i++) aj[i] += mat->cmap->rstart; 1391 for (i=0; i<m; i++) { 1392 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],aj,a,INSERT_VALUES);CHKERRQ(ierr); 1393 row++; 1394 a += ai[i+1]-ai[i]; aj += ai[i+1]-ai[i]; 1395 } 1396 aj = Aloc->j; 1397 for (i=0; i<ai[m]; i++) aj[i] -= mat->cmap->rstart; 1398 1399 /* copy over the B part */ 1400 Aloc = (Mat_SeqAIJ*)aij->B->data; 1401 m = aij->B->rmap->n; ai = Aloc->i; aj = Aloc->j; a = Aloc->a; 1402 row = mat->rmap->rstart; 1403 ierr = PetscMalloc1(ai[m]+1,&cols);CHKERRQ(ierr); 1404 ct = cols; 1405 for (i=0; i<ai[m]; i++) cols[i] = aij->garray[aj[i]]; 1406 for (i=0; i<m; i++) { 1407 ierr = MatSetValues(A,1,&row,ai[i+1]-ai[i],cols,a,INSERT_VALUES);CHKERRQ(ierr); 1408 row++; 1409 a += ai[i+1]-ai[i]; cols += ai[i+1]-ai[i]; 1410 } 1411 ierr = PetscFree(ct);CHKERRQ(ierr); 1412 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1413 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1414 /* 1415 Everyone has to call to draw the matrix since the graphics waits are 1416 synchronized across all processors that share the PetscDraw object 1417 */ 1418 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1419 if (!rank) { 1420 ierr = PetscObjectSetName((PetscObject)((Mat_MPIAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1421 ierr = MatView_SeqAIJ(((Mat_MPIAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr); 1422 } 1423 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1424 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1425 ierr = MatDestroy(&A);CHKERRQ(ierr); 1426 } 1427 PetscFunctionReturn(0); 1428 } 1429 1430 #undef __FUNCT__ 1431 #define __FUNCT__ "MatView_MPIAIJ" 1432 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1433 { 1434 PetscErrorCode ierr; 1435 PetscBool iascii,isdraw,issocket,isbinary; 1436 1437 PetscFunctionBegin; 1438 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1439 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1440 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1441 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1442 if (iascii || isdraw || isbinary || issocket) { 1443 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1444 } 1445 PetscFunctionReturn(0); 1446 } 1447 1448 #undef __FUNCT__ 1449 #define __FUNCT__ "MatSOR_MPIAIJ" 1450 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1451 { 1452 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1453 PetscErrorCode ierr; 1454 Vec bb1 = 0; 1455 PetscBool hasop; 1456 1457 PetscFunctionBegin; 1458 if (flag == SOR_APPLY_UPPER) { 1459 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1460 PetscFunctionReturn(0); 1461 } 1462 1463 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1464 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1465 } 1466 1467 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1468 if (flag & SOR_ZERO_INITIAL_GUESS) { 1469 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1470 its--; 1471 } 1472 1473 while (its--) { 1474 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1475 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1476 1477 /* update rhs: bb1 = bb - B*x */ 1478 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1479 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1480 1481 /* local sweep */ 1482 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1483 } 1484 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1485 if (flag & SOR_ZERO_INITIAL_GUESS) { 1486 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1487 its--; 1488 } 1489 while (its--) { 1490 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1491 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1492 1493 /* update rhs: bb1 = bb - B*x */ 1494 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1495 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1496 1497 /* local sweep */ 1498 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1499 } 1500 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1501 if (flag & SOR_ZERO_INITIAL_GUESS) { 1502 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1503 its--; 1504 } 1505 while (its--) { 1506 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1507 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1508 1509 /* update rhs: bb1 = bb - B*x */ 1510 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1511 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1512 1513 /* local sweep */ 1514 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1515 } 1516 } else if (flag & SOR_EISENSTAT) { 1517 Vec xx1; 1518 1519 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1520 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1521 1522 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1523 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1524 if (!mat->diag) { 1525 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1526 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1527 } 1528 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1529 if (hasop) { 1530 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1531 } else { 1532 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1533 } 1534 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1535 1536 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1537 1538 /* local sweep */ 1539 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1540 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1541 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1542 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1543 1544 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1545 1546 matin->factorerrortype = mat->A->factorerrortype; 1547 PetscFunctionReturn(0); 1548 } 1549 1550 #undef __FUNCT__ 1551 #define __FUNCT__ "MatPermute_MPIAIJ" 1552 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1553 { 1554 Mat aA,aB,Aperm; 1555 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1556 PetscScalar *aa,*ba; 1557 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1558 PetscSF rowsf,sf; 1559 IS parcolp = NULL; 1560 PetscBool done; 1561 PetscErrorCode ierr; 1562 1563 PetscFunctionBegin; 1564 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1565 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1566 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1567 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1568 1569 /* Invert row permutation to find out where my rows should go */ 1570 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1571 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1572 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1573 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1574 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1575 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1576 1577 /* Invert column permutation to find out where my columns should go */ 1578 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1579 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1580 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1581 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1582 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1583 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1584 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1585 1586 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1587 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1588 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1589 1590 /* Find out where my gcols should go */ 1591 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1592 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1593 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1594 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1595 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1596 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1597 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1598 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1599 1600 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1601 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1602 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1603 for (i=0; i<m; i++) { 1604 PetscInt row = rdest[i],rowner; 1605 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1606 for (j=ai[i]; j<ai[i+1]; j++) { 1607 PetscInt cowner,col = cdest[aj[j]]; 1608 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1609 if (rowner == cowner) dnnz[i]++; 1610 else onnz[i]++; 1611 } 1612 for (j=bi[i]; j<bi[i+1]; j++) { 1613 PetscInt cowner,col = gcdest[bj[j]]; 1614 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1615 if (rowner == cowner) dnnz[i]++; 1616 else onnz[i]++; 1617 } 1618 } 1619 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1620 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1621 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1622 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1623 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1624 1625 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1626 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1627 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1628 for (i=0; i<m; i++) { 1629 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1630 PetscInt j0,rowlen; 1631 rowlen = ai[i+1] - ai[i]; 1632 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1633 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1634 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1635 } 1636 rowlen = bi[i+1] - bi[i]; 1637 for (j0=j=0; j<rowlen; j0=j) { 1638 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1639 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1640 } 1641 } 1642 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1643 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1644 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1645 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1646 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1647 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1648 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1649 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1650 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1651 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1652 *B = Aperm; 1653 PetscFunctionReturn(0); 1654 } 1655 1656 #undef __FUNCT__ 1657 #define __FUNCT__ "MatGetGhosts_MPIAIJ" 1658 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1659 { 1660 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1661 PetscErrorCode ierr; 1662 1663 PetscFunctionBegin; 1664 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1665 if (ghosts) *ghosts = aij->garray; 1666 PetscFunctionReturn(0); 1667 } 1668 1669 #undef __FUNCT__ 1670 #define __FUNCT__ "MatGetInfo_MPIAIJ" 1671 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1672 { 1673 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1674 Mat A = mat->A,B = mat->B; 1675 PetscErrorCode ierr; 1676 PetscReal isend[5],irecv[5]; 1677 1678 PetscFunctionBegin; 1679 info->block_size = 1.0; 1680 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1681 1682 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1683 isend[3] = info->memory; isend[4] = info->mallocs; 1684 1685 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1686 1687 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1688 isend[3] += info->memory; isend[4] += info->mallocs; 1689 if (flag == MAT_LOCAL) { 1690 info->nz_used = isend[0]; 1691 info->nz_allocated = isend[1]; 1692 info->nz_unneeded = isend[2]; 1693 info->memory = isend[3]; 1694 info->mallocs = isend[4]; 1695 } else if (flag == MAT_GLOBAL_MAX) { 1696 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1697 1698 info->nz_used = irecv[0]; 1699 info->nz_allocated = irecv[1]; 1700 info->nz_unneeded = irecv[2]; 1701 info->memory = irecv[3]; 1702 info->mallocs = irecv[4]; 1703 } else if (flag == MAT_GLOBAL_SUM) { 1704 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1705 1706 info->nz_used = irecv[0]; 1707 info->nz_allocated = irecv[1]; 1708 info->nz_unneeded = irecv[2]; 1709 info->memory = irecv[3]; 1710 info->mallocs = irecv[4]; 1711 } 1712 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1713 info->fill_ratio_needed = 0; 1714 info->factor_mallocs = 0; 1715 PetscFunctionReturn(0); 1716 } 1717 1718 #undef __FUNCT__ 1719 #define __FUNCT__ "MatSetOption_MPIAIJ" 1720 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1721 { 1722 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1723 PetscErrorCode ierr; 1724 1725 PetscFunctionBegin; 1726 switch (op) { 1727 case MAT_NEW_NONZERO_LOCATIONS: 1728 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1729 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1730 case MAT_KEEP_NONZERO_PATTERN: 1731 case MAT_NEW_NONZERO_LOCATION_ERR: 1732 case MAT_USE_INODES: 1733 case MAT_IGNORE_ZERO_ENTRIES: 1734 MatCheckPreallocated(A,1); 1735 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1736 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1737 break; 1738 case MAT_ROW_ORIENTED: 1739 MatCheckPreallocated(A,1); 1740 a->roworiented = flg; 1741 1742 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1743 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1744 break; 1745 case MAT_NEW_DIAGONALS: 1746 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1747 break; 1748 case MAT_IGNORE_OFF_PROC_ENTRIES: 1749 a->donotstash = flg; 1750 break; 1751 case MAT_SPD: 1752 A->spd_set = PETSC_TRUE; 1753 A->spd = flg; 1754 if (flg) { 1755 A->symmetric = PETSC_TRUE; 1756 A->structurally_symmetric = PETSC_TRUE; 1757 A->symmetric_set = PETSC_TRUE; 1758 A->structurally_symmetric_set = PETSC_TRUE; 1759 } 1760 break; 1761 case MAT_SYMMETRIC: 1762 MatCheckPreallocated(A,1); 1763 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1764 break; 1765 case MAT_STRUCTURALLY_SYMMETRIC: 1766 MatCheckPreallocated(A,1); 1767 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1768 break; 1769 case MAT_HERMITIAN: 1770 MatCheckPreallocated(A,1); 1771 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1772 break; 1773 case MAT_SYMMETRY_ETERNAL: 1774 MatCheckPreallocated(A,1); 1775 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1776 break; 1777 case MAT_SUBMAT_SINGLEIS: 1778 A->submat_singleis = flg; 1779 break; 1780 default: 1781 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1782 } 1783 PetscFunctionReturn(0); 1784 } 1785 1786 #undef __FUNCT__ 1787 #define __FUNCT__ "MatGetRow_MPIAIJ" 1788 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1789 { 1790 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1791 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1792 PetscErrorCode ierr; 1793 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1794 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1795 PetscInt *cmap,*idx_p; 1796 1797 PetscFunctionBegin; 1798 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1799 mat->getrowactive = PETSC_TRUE; 1800 1801 if (!mat->rowvalues && (idx || v)) { 1802 /* 1803 allocate enough space to hold information from the longest row. 1804 */ 1805 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1806 PetscInt max = 1,tmp; 1807 for (i=0; i<matin->rmap->n; i++) { 1808 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1809 if (max < tmp) max = tmp; 1810 } 1811 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1812 } 1813 1814 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1815 lrow = row - rstart; 1816 1817 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1818 if (!v) {pvA = 0; pvB = 0;} 1819 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1820 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1821 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1822 nztot = nzA + nzB; 1823 1824 cmap = mat->garray; 1825 if (v || idx) { 1826 if (nztot) { 1827 /* Sort by increasing column numbers, assuming A and B already sorted */ 1828 PetscInt imark = -1; 1829 if (v) { 1830 *v = v_p = mat->rowvalues; 1831 for (i=0; i<nzB; i++) { 1832 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1833 else break; 1834 } 1835 imark = i; 1836 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1837 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1838 } 1839 if (idx) { 1840 *idx = idx_p = mat->rowindices; 1841 if (imark > -1) { 1842 for (i=0; i<imark; i++) { 1843 idx_p[i] = cmap[cworkB[i]]; 1844 } 1845 } else { 1846 for (i=0; i<nzB; i++) { 1847 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1848 else break; 1849 } 1850 imark = i; 1851 } 1852 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1853 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1854 } 1855 } else { 1856 if (idx) *idx = 0; 1857 if (v) *v = 0; 1858 } 1859 } 1860 *nz = nztot; 1861 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1862 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1863 PetscFunctionReturn(0); 1864 } 1865 1866 #undef __FUNCT__ 1867 #define __FUNCT__ "MatRestoreRow_MPIAIJ" 1868 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1869 { 1870 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1871 1872 PetscFunctionBegin; 1873 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1874 aij->getrowactive = PETSC_FALSE; 1875 PetscFunctionReturn(0); 1876 } 1877 1878 #undef __FUNCT__ 1879 #define __FUNCT__ "MatNorm_MPIAIJ" 1880 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1881 { 1882 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1883 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1884 PetscErrorCode ierr; 1885 PetscInt i,j,cstart = mat->cmap->rstart; 1886 PetscReal sum = 0.0; 1887 MatScalar *v; 1888 1889 PetscFunctionBegin; 1890 if (aij->size == 1) { 1891 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1892 } else { 1893 if (type == NORM_FROBENIUS) { 1894 v = amat->a; 1895 for (i=0; i<amat->nz; i++) { 1896 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1897 } 1898 v = bmat->a; 1899 for (i=0; i<bmat->nz; i++) { 1900 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1901 } 1902 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1903 *norm = PetscSqrtReal(*norm); 1904 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1905 } else if (type == NORM_1) { /* max column norm */ 1906 PetscReal *tmp,*tmp2; 1907 PetscInt *jj,*garray = aij->garray; 1908 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1909 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1910 *norm = 0.0; 1911 v = amat->a; jj = amat->j; 1912 for (j=0; j<amat->nz; j++) { 1913 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1914 } 1915 v = bmat->a; jj = bmat->j; 1916 for (j=0; j<bmat->nz; j++) { 1917 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1918 } 1919 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1920 for (j=0; j<mat->cmap->N; j++) { 1921 if (tmp2[j] > *norm) *norm = tmp2[j]; 1922 } 1923 ierr = PetscFree(tmp);CHKERRQ(ierr); 1924 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1925 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1926 } else if (type == NORM_INFINITY) { /* max row norm */ 1927 PetscReal ntemp = 0.0; 1928 for (j=0; j<aij->A->rmap->n; j++) { 1929 v = amat->a + amat->i[j]; 1930 sum = 0.0; 1931 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 1932 sum += PetscAbsScalar(*v); v++; 1933 } 1934 v = bmat->a + bmat->i[j]; 1935 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 1936 sum += PetscAbsScalar(*v); v++; 1937 } 1938 if (sum > ntemp) ntemp = sum; 1939 } 1940 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1941 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1942 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 1943 } 1944 PetscFunctionReturn(0); 1945 } 1946 1947 #undef __FUNCT__ 1948 #define __FUNCT__ "MatTranspose_MPIAIJ" 1949 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 1950 { 1951 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1952 Mat_SeqAIJ *Aloc=(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data; 1953 PetscErrorCode ierr; 1954 PetscInt M = A->rmap->N,N = A->cmap->N,ma,na,mb,nb,*ai,*aj,*bi,*bj,row,*cols,*cols_tmp,i; 1955 PetscInt cstart = A->cmap->rstart,ncol; 1956 Mat B; 1957 MatScalar *array; 1958 1959 PetscFunctionBegin; 1960 if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1961 1962 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 1963 ai = Aloc->i; aj = Aloc->j; 1964 bi = Bloc->i; bj = Bloc->j; 1965 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1966 PetscInt *d_nnz,*g_nnz,*o_nnz; 1967 PetscSFNode *oloc; 1968 PETSC_UNUSED PetscSF sf; 1969 1970 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 1971 /* compute d_nnz for preallocation */ 1972 ierr = PetscMemzero(d_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1973 for (i=0; i<ai[ma]; i++) { 1974 d_nnz[aj[i]]++; 1975 aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1976 } 1977 /* compute local off-diagonal contributions */ 1978 ierr = PetscMemzero(g_nnz,nb*sizeof(PetscInt));CHKERRQ(ierr); 1979 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 1980 /* map those to global */ 1981 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1982 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 1983 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1984 ierr = PetscMemzero(o_nnz,na*sizeof(PetscInt));CHKERRQ(ierr); 1985 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1986 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 1987 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1988 1989 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 1990 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 1991 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 1992 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 1993 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 1994 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 1995 } else { 1996 B = *matout; 1997 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 1998 for (i=0; i<ai[ma]; i++) aj[i] += cstart; /* global col index to be used by MatSetValues() */ 1999 } 2000 2001 /* copy over the A part */ 2002 array = Aloc->a; 2003 row = A->rmap->rstart; 2004 for (i=0; i<ma; i++) { 2005 ncol = ai[i+1]-ai[i]; 2006 ierr = MatSetValues(B,ncol,aj,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2007 row++; 2008 array += ncol; aj += ncol; 2009 } 2010 aj = Aloc->j; 2011 for (i=0; i<ai[ma]; i++) aj[i] -= cstart; /* resume local col index */ 2012 2013 /* copy over the B part */ 2014 ierr = PetscCalloc1(bi[mb],&cols);CHKERRQ(ierr); 2015 array = Bloc->a; 2016 row = A->rmap->rstart; 2017 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2018 cols_tmp = cols; 2019 for (i=0; i<mb; i++) { 2020 ncol = bi[i+1]-bi[i]; 2021 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2022 row++; 2023 array += ncol; cols_tmp += ncol; 2024 } 2025 ierr = PetscFree(cols);CHKERRQ(ierr); 2026 2027 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2028 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2029 if (reuse == MAT_INITIAL_MATRIX || *matout != A) { 2030 *matout = B; 2031 } else { 2032 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2033 } 2034 PetscFunctionReturn(0); 2035 } 2036 2037 #undef __FUNCT__ 2038 #define __FUNCT__ "MatDiagonalScale_MPIAIJ" 2039 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2040 { 2041 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2042 Mat a = aij->A,b = aij->B; 2043 PetscErrorCode ierr; 2044 PetscInt s1,s2,s3; 2045 2046 PetscFunctionBegin; 2047 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2048 if (rr) { 2049 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2050 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2051 /* Overlap communication with computation. */ 2052 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2053 } 2054 if (ll) { 2055 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2056 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2057 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2058 } 2059 /* scale the diagonal block */ 2060 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2061 2062 if (rr) { 2063 /* Do a scatter end and then right scale the off-diagonal block */ 2064 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2065 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2066 } 2067 PetscFunctionReturn(0); 2068 } 2069 2070 #undef __FUNCT__ 2071 #define __FUNCT__ "MatSetUnfactored_MPIAIJ" 2072 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2073 { 2074 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2075 PetscErrorCode ierr; 2076 2077 PetscFunctionBegin; 2078 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2079 PetscFunctionReturn(0); 2080 } 2081 2082 #undef __FUNCT__ 2083 #define __FUNCT__ "MatEqual_MPIAIJ" 2084 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2085 { 2086 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2087 Mat a,b,c,d; 2088 PetscBool flg; 2089 PetscErrorCode ierr; 2090 2091 PetscFunctionBegin; 2092 a = matA->A; b = matA->B; 2093 c = matB->A; d = matB->B; 2094 2095 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2096 if (flg) { 2097 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2098 } 2099 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2100 PetscFunctionReturn(0); 2101 } 2102 2103 #undef __FUNCT__ 2104 #define __FUNCT__ "MatCopy_MPIAIJ" 2105 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2106 { 2107 PetscErrorCode ierr; 2108 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2109 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2110 2111 PetscFunctionBegin; 2112 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2113 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2114 /* because of the column compression in the off-processor part of the matrix a->B, 2115 the number of columns in a->B and b->B may be different, hence we cannot call 2116 the MatCopy() directly on the two parts. If need be, we can provide a more 2117 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2118 then copying the submatrices */ 2119 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2120 } else { 2121 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2122 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2123 } 2124 PetscFunctionReturn(0); 2125 } 2126 2127 #undef __FUNCT__ 2128 #define __FUNCT__ "MatSetUp_MPIAIJ" 2129 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2130 { 2131 PetscErrorCode ierr; 2132 2133 PetscFunctionBegin; 2134 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2135 PetscFunctionReturn(0); 2136 } 2137 2138 /* 2139 Computes the number of nonzeros per row needed for preallocation when X and Y 2140 have different nonzero structure. 2141 */ 2142 #undef __FUNCT__ 2143 #define __FUNCT__ "MatAXPYGetPreallocation_MPIX_private" 2144 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2145 { 2146 PetscInt i,j,k,nzx,nzy; 2147 2148 PetscFunctionBegin; 2149 /* Set the number of nonzeros in the new matrix */ 2150 for (i=0; i<m; i++) { 2151 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2152 nzx = xi[i+1] - xi[i]; 2153 nzy = yi[i+1] - yi[i]; 2154 nnz[i] = 0; 2155 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2156 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2157 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2158 nnz[i]++; 2159 } 2160 for (; k<nzy; k++) nnz[i]++; 2161 } 2162 PetscFunctionReturn(0); 2163 } 2164 2165 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2166 #undef __FUNCT__ 2167 #define __FUNCT__ "MatAXPYGetPreallocation_MPIAIJ" 2168 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2169 { 2170 PetscErrorCode ierr; 2171 PetscInt m = Y->rmap->N; 2172 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2173 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2174 2175 PetscFunctionBegin; 2176 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2177 PetscFunctionReturn(0); 2178 } 2179 2180 #undef __FUNCT__ 2181 #define __FUNCT__ "MatAXPY_MPIAIJ" 2182 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2183 { 2184 PetscErrorCode ierr; 2185 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2186 PetscBLASInt bnz,one=1; 2187 Mat_SeqAIJ *x,*y; 2188 2189 PetscFunctionBegin; 2190 if (str == SAME_NONZERO_PATTERN) { 2191 PetscScalar alpha = a; 2192 x = (Mat_SeqAIJ*)xx->A->data; 2193 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2194 y = (Mat_SeqAIJ*)yy->A->data; 2195 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2196 x = (Mat_SeqAIJ*)xx->B->data; 2197 y = (Mat_SeqAIJ*)yy->B->data; 2198 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2199 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2200 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2201 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2202 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2203 } else { 2204 Mat B; 2205 PetscInt *nnz_d,*nnz_o; 2206 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2207 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2208 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2209 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2210 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2211 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2212 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2213 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2214 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2215 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2216 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2217 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2218 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2219 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2220 } 2221 PetscFunctionReturn(0); 2222 } 2223 2224 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2225 2226 #undef __FUNCT__ 2227 #define __FUNCT__ "MatConjugate_MPIAIJ" 2228 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2229 { 2230 #if defined(PETSC_USE_COMPLEX) 2231 PetscErrorCode ierr; 2232 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2233 2234 PetscFunctionBegin; 2235 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2236 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2237 #else 2238 PetscFunctionBegin; 2239 #endif 2240 PetscFunctionReturn(0); 2241 } 2242 2243 #undef __FUNCT__ 2244 #define __FUNCT__ "MatRealPart_MPIAIJ" 2245 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2246 { 2247 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2248 PetscErrorCode ierr; 2249 2250 PetscFunctionBegin; 2251 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2252 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2253 PetscFunctionReturn(0); 2254 } 2255 2256 #undef __FUNCT__ 2257 #define __FUNCT__ "MatImaginaryPart_MPIAIJ" 2258 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2259 { 2260 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2265 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 #undef __FUNCT__ 2270 #define __FUNCT__ "MatGetRowMaxAbs_MPIAIJ" 2271 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2272 { 2273 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2274 PetscErrorCode ierr; 2275 PetscInt i,*idxb = 0; 2276 PetscScalar *va,*vb; 2277 Vec vtmp; 2278 2279 PetscFunctionBegin; 2280 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2281 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2282 if (idx) { 2283 for (i=0; i<A->rmap->n; i++) { 2284 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2285 } 2286 } 2287 2288 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2289 if (idx) { 2290 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2291 } 2292 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2293 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2294 2295 for (i=0; i<A->rmap->n; i++) { 2296 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2297 va[i] = vb[i]; 2298 if (idx) idx[i] = a->garray[idxb[i]]; 2299 } 2300 } 2301 2302 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2303 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2304 ierr = PetscFree(idxb);CHKERRQ(ierr); 2305 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2306 PetscFunctionReturn(0); 2307 } 2308 2309 #undef __FUNCT__ 2310 #define __FUNCT__ "MatGetRowMinAbs_MPIAIJ" 2311 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2312 { 2313 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2314 PetscErrorCode ierr; 2315 PetscInt i,*idxb = 0; 2316 PetscScalar *va,*vb; 2317 Vec vtmp; 2318 2319 PetscFunctionBegin; 2320 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2321 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2322 if (idx) { 2323 for (i=0; i<A->cmap->n; i++) { 2324 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2325 } 2326 } 2327 2328 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2329 if (idx) { 2330 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2331 } 2332 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2333 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2334 2335 for (i=0; i<A->rmap->n; i++) { 2336 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2337 va[i] = vb[i]; 2338 if (idx) idx[i] = a->garray[idxb[i]]; 2339 } 2340 } 2341 2342 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2343 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2344 ierr = PetscFree(idxb);CHKERRQ(ierr); 2345 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2346 PetscFunctionReturn(0); 2347 } 2348 2349 #undef __FUNCT__ 2350 #define __FUNCT__ "MatGetRowMin_MPIAIJ" 2351 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2352 { 2353 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2354 PetscInt n = A->rmap->n; 2355 PetscInt cstart = A->cmap->rstart; 2356 PetscInt *cmap = mat->garray; 2357 PetscInt *diagIdx, *offdiagIdx; 2358 Vec diagV, offdiagV; 2359 PetscScalar *a, *diagA, *offdiagA; 2360 PetscInt r; 2361 PetscErrorCode ierr; 2362 2363 PetscFunctionBegin; 2364 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2365 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2366 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2367 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2368 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2369 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2370 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2371 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2372 for (r = 0; r < n; ++r) { 2373 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2374 a[r] = diagA[r]; 2375 idx[r] = cstart + diagIdx[r]; 2376 } else { 2377 a[r] = offdiagA[r]; 2378 idx[r] = cmap[offdiagIdx[r]]; 2379 } 2380 } 2381 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2382 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2383 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2384 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2385 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2386 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 #undef __FUNCT__ 2391 #define __FUNCT__ "MatGetRowMax_MPIAIJ" 2392 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2393 { 2394 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2395 PetscInt n = A->rmap->n; 2396 PetscInt cstart = A->cmap->rstart; 2397 PetscInt *cmap = mat->garray; 2398 PetscInt *diagIdx, *offdiagIdx; 2399 Vec diagV, offdiagV; 2400 PetscScalar *a, *diagA, *offdiagA; 2401 PetscInt r; 2402 PetscErrorCode ierr; 2403 2404 PetscFunctionBegin; 2405 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2406 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2407 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2408 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2409 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2410 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2411 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2412 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2413 for (r = 0; r < n; ++r) { 2414 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2415 a[r] = diagA[r]; 2416 idx[r] = cstart + diagIdx[r]; 2417 } else { 2418 a[r] = offdiagA[r]; 2419 idx[r] = cmap[offdiagIdx[r]]; 2420 } 2421 } 2422 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2423 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2424 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2425 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2426 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2427 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2428 PetscFunctionReturn(0); 2429 } 2430 2431 #undef __FUNCT__ 2432 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIAIJ" 2433 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2434 { 2435 PetscErrorCode ierr; 2436 Mat *dummy; 2437 2438 PetscFunctionBegin; 2439 ierr = MatGetSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2440 *newmat = *dummy; 2441 ierr = PetscFree(dummy);CHKERRQ(ierr); 2442 PetscFunctionReturn(0); 2443 } 2444 2445 #undef __FUNCT__ 2446 #define __FUNCT__ "MatInvertBlockDiagonal_MPIAIJ" 2447 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2448 { 2449 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2450 PetscErrorCode ierr; 2451 2452 PetscFunctionBegin; 2453 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2454 A->factorerrortype = a->A->factorerrortype; 2455 PetscFunctionReturn(0); 2456 } 2457 2458 #undef __FUNCT__ 2459 #define __FUNCT__ "MatSetRandom_MPIAIJ" 2460 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2461 { 2462 PetscErrorCode ierr; 2463 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2464 2465 PetscFunctionBegin; 2466 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2467 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2468 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2469 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ" 2475 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2476 { 2477 PetscFunctionBegin; 2478 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2479 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2480 PetscFunctionReturn(0); 2481 } 2482 2483 #undef __FUNCT__ 2484 #define __FUNCT__ "MatMPIAIJSetUseScalableIncreaseOverlap" 2485 /*@ 2486 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2487 2488 Collective on Mat 2489 2490 Input Parameters: 2491 + A - the matrix 2492 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2493 2494 Level: advanced 2495 2496 @*/ 2497 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2498 { 2499 PetscErrorCode ierr; 2500 2501 PetscFunctionBegin; 2502 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 #undef __FUNCT__ 2507 #define __FUNCT__ "MatSetFromOptions_MPIAIJ" 2508 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2509 { 2510 PetscErrorCode ierr; 2511 PetscBool sc = PETSC_FALSE,flg; 2512 2513 PetscFunctionBegin; 2514 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2515 ierr = PetscObjectOptionsBegin((PetscObject)A); 2516 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2517 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2518 if (flg) { 2519 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2520 } 2521 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2522 PetscFunctionReturn(0); 2523 } 2524 2525 #undef __FUNCT__ 2526 #define __FUNCT__ "MatShift_MPIAIJ" 2527 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2528 { 2529 PetscErrorCode ierr; 2530 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2531 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2532 2533 PetscFunctionBegin; 2534 if (!Y->preallocated) { 2535 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2536 } else if (!aij->nz) { 2537 PetscInt nonew = aij->nonew; 2538 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2539 aij->nonew = nonew; 2540 } 2541 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 #undef __FUNCT__ 2546 #define __FUNCT__ "MatMissingDiagonal_MPIAIJ" 2547 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2548 { 2549 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2550 PetscErrorCode ierr; 2551 2552 PetscFunctionBegin; 2553 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2554 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2555 if (d) { 2556 PetscInt rstart; 2557 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2558 *d += rstart; 2559 2560 } 2561 PetscFunctionReturn(0); 2562 } 2563 2564 2565 /* -------------------------------------------------------------------*/ 2566 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2567 MatGetRow_MPIAIJ, 2568 MatRestoreRow_MPIAIJ, 2569 MatMult_MPIAIJ, 2570 /* 4*/ MatMultAdd_MPIAIJ, 2571 MatMultTranspose_MPIAIJ, 2572 MatMultTransposeAdd_MPIAIJ, 2573 0, 2574 0, 2575 0, 2576 /*10*/ 0, 2577 0, 2578 0, 2579 MatSOR_MPIAIJ, 2580 MatTranspose_MPIAIJ, 2581 /*15*/ MatGetInfo_MPIAIJ, 2582 MatEqual_MPIAIJ, 2583 MatGetDiagonal_MPIAIJ, 2584 MatDiagonalScale_MPIAIJ, 2585 MatNorm_MPIAIJ, 2586 /*20*/ MatAssemblyBegin_MPIAIJ, 2587 MatAssemblyEnd_MPIAIJ, 2588 MatSetOption_MPIAIJ, 2589 MatZeroEntries_MPIAIJ, 2590 /*24*/ MatZeroRows_MPIAIJ, 2591 0, 2592 0, 2593 0, 2594 0, 2595 /*29*/ MatSetUp_MPIAIJ, 2596 0, 2597 0, 2598 MatGetDiagonalBlock_MPIAIJ, 2599 0, 2600 /*34*/ MatDuplicate_MPIAIJ, 2601 0, 2602 0, 2603 0, 2604 0, 2605 /*39*/ MatAXPY_MPIAIJ, 2606 MatGetSubMatrices_MPIAIJ, 2607 MatIncreaseOverlap_MPIAIJ, 2608 MatGetValues_MPIAIJ, 2609 MatCopy_MPIAIJ, 2610 /*44*/ MatGetRowMax_MPIAIJ, 2611 MatScale_MPIAIJ, 2612 MatShift_MPIAIJ, 2613 MatDiagonalSet_MPIAIJ, 2614 MatZeroRowsColumns_MPIAIJ, 2615 /*49*/ MatSetRandom_MPIAIJ, 2616 0, 2617 0, 2618 0, 2619 0, 2620 /*54*/ MatFDColoringCreate_MPIXAIJ, 2621 0, 2622 MatSetUnfactored_MPIAIJ, 2623 MatPermute_MPIAIJ, 2624 0, 2625 /*59*/ MatGetSubMatrix_MPIAIJ, 2626 MatDestroy_MPIAIJ, 2627 MatView_MPIAIJ, 2628 0, 2629 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2630 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2631 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2632 0, 2633 0, 2634 0, 2635 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2636 MatGetRowMinAbs_MPIAIJ, 2637 0, 2638 0, 2639 0, 2640 0, 2641 /*75*/ MatFDColoringApply_AIJ, 2642 MatSetFromOptions_MPIAIJ, 2643 0, 2644 0, 2645 MatFindZeroDiagonals_MPIAIJ, 2646 /*80*/ 0, 2647 0, 2648 0, 2649 /*83*/ MatLoad_MPIAIJ, 2650 0, 2651 0, 2652 0, 2653 0, 2654 0, 2655 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2656 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2657 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2658 MatPtAP_MPIAIJ_MPIAIJ, 2659 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2660 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /*99*/ 0, 2666 0, 2667 0, 2668 MatConjugate_MPIAIJ, 2669 0, 2670 /*104*/MatSetValuesRow_MPIAIJ, 2671 MatRealPart_MPIAIJ, 2672 MatImaginaryPart_MPIAIJ, 2673 0, 2674 0, 2675 /*109*/0, 2676 0, 2677 MatGetRowMin_MPIAIJ, 2678 0, 2679 MatMissingDiagonal_MPIAIJ, 2680 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2681 0, 2682 MatGetGhosts_MPIAIJ, 2683 0, 2684 0, 2685 /*119*/0, 2686 0, 2687 0, 2688 0, 2689 MatGetMultiProcBlock_MPIAIJ, 2690 /*124*/MatFindNonzeroRows_MPIAIJ, 2691 MatGetColumnNorms_MPIAIJ, 2692 MatInvertBlockDiagonal_MPIAIJ, 2693 0, 2694 MatGetSubMatricesMPI_MPIAIJ, 2695 /*129*/0, 2696 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2697 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2698 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2699 0, 2700 /*134*/0, 2701 0, 2702 0, 2703 0, 2704 0, 2705 /*139*/0, 2706 0, 2707 0, 2708 MatFDColoringSetUp_MPIXAIJ, 2709 MatFindOffBlockDiagonalEntries_MPIAIJ, 2710 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2711 }; 2712 2713 /* ----------------------------------------------------------------------------------------*/ 2714 2715 #undef __FUNCT__ 2716 #define __FUNCT__ "MatStoreValues_MPIAIJ" 2717 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2718 { 2719 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2720 PetscErrorCode ierr; 2721 2722 PetscFunctionBegin; 2723 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2724 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2725 PetscFunctionReturn(0); 2726 } 2727 2728 #undef __FUNCT__ 2729 #define __FUNCT__ "MatRetrieveValues_MPIAIJ" 2730 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2731 { 2732 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2733 PetscErrorCode ierr; 2734 2735 PetscFunctionBegin; 2736 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2737 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2738 PetscFunctionReturn(0); 2739 } 2740 2741 #undef __FUNCT__ 2742 #define __FUNCT__ "MatMPIAIJSetPreallocation_MPIAIJ" 2743 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2744 { 2745 Mat_MPIAIJ *b; 2746 PetscErrorCode ierr; 2747 2748 PetscFunctionBegin; 2749 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2750 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2751 b = (Mat_MPIAIJ*)B->data; 2752 2753 #if defined(PETSC_USE_CTABLE) 2754 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2755 #else 2756 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2757 #endif 2758 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2759 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2760 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2761 2762 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2763 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2764 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2765 ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr); 2766 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2767 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2768 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2769 2770 if (!B->preallocated) { 2771 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2772 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2773 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2774 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2775 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2776 } 2777 2778 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2779 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2780 B->preallocated = PETSC_TRUE; 2781 B->was_assembled = PETSC_FALSE; 2782 B->assembled = PETSC_FALSE;; 2783 PetscFunctionReturn(0); 2784 } 2785 2786 #undef __FUNCT__ 2787 #define __FUNCT__ "MatDuplicate_MPIAIJ" 2788 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2789 { 2790 Mat mat; 2791 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2792 PetscErrorCode ierr; 2793 2794 PetscFunctionBegin; 2795 *newmat = 0; 2796 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2797 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2798 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2799 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2800 ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 2801 a = (Mat_MPIAIJ*)mat->data; 2802 2803 mat->factortype = matin->factortype; 2804 mat->assembled = PETSC_TRUE; 2805 mat->insertmode = NOT_SET_VALUES; 2806 mat->preallocated = PETSC_TRUE; 2807 2808 a->size = oldmat->size; 2809 a->rank = oldmat->rank; 2810 a->donotstash = oldmat->donotstash; 2811 a->roworiented = oldmat->roworiented; 2812 a->rowindices = 0; 2813 a->rowvalues = 0; 2814 a->getrowactive = PETSC_FALSE; 2815 2816 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2817 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2818 2819 if (oldmat->colmap) { 2820 #if defined(PETSC_USE_CTABLE) 2821 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2822 #else 2823 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2824 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2825 ierr = PetscMemcpy(a->colmap,oldmat->colmap,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2826 #endif 2827 } else a->colmap = 0; 2828 if (oldmat->garray) { 2829 PetscInt len; 2830 len = oldmat->B->cmap->n; 2831 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2832 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2833 if (len) { ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 2834 } else a->garray = 0; 2835 2836 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2837 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2838 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2839 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2840 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2841 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2842 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2843 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2844 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2845 *newmat = mat; 2846 PetscFunctionReturn(0); 2847 } 2848 2849 2850 2851 #undef __FUNCT__ 2852 #define __FUNCT__ "MatLoad_MPIAIJ" 2853 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2854 { 2855 PetscScalar *vals,*svals; 2856 MPI_Comm comm; 2857 PetscErrorCode ierr; 2858 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2859 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2860 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2861 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2862 PetscInt cend,cstart,n,*rowners; 2863 int fd; 2864 PetscInt bs = newMat->rmap->bs; 2865 2866 PetscFunctionBegin; 2867 /* force binary viewer to load .info file if it has not yet done so */ 2868 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2869 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2870 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2871 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2872 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2873 if (!rank) { 2874 ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr); 2875 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2876 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2877 } 2878 2879 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2880 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2881 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2882 if (bs < 0) bs = 1; 2883 2884 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2885 M = header[1]; N = header[2]; 2886 2887 /* If global sizes are set, check if they are consistent with that given in the file */ 2888 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2889 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2890 2891 /* determine ownership of all (block) rows */ 2892 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2893 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2894 else m = newMat->rmap->n; /* Set by user */ 2895 2896 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2897 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2898 2899 /* First process needs enough room for process with most rows */ 2900 if (!rank) { 2901 mmax = rowners[1]; 2902 for (i=2; i<=size; i++) { 2903 mmax = PetscMax(mmax, rowners[i]); 2904 } 2905 } else mmax = -1; /* unused, but compilers complain */ 2906 2907 rowners[0] = 0; 2908 for (i=2; i<=size; i++) { 2909 rowners[i] += rowners[i-1]; 2910 } 2911 rstart = rowners[rank]; 2912 rend = rowners[rank+1]; 2913 2914 /* distribute row lengths to all processors */ 2915 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 2916 if (!rank) { 2917 ierr = PetscBinaryRead(fd,ourlens,m,PETSC_INT);CHKERRQ(ierr); 2918 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 2919 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 2920 for (j=0; j<m; j++) { 2921 procsnz[0] += ourlens[j]; 2922 } 2923 for (i=1; i<size; i++) { 2924 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],PETSC_INT);CHKERRQ(ierr); 2925 /* calculate the number of nonzeros on each processor */ 2926 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 2927 procsnz[i] += rowlengths[j]; 2928 } 2929 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2930 } 2931 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 2932 } else { 2933 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2934 } 2935 2936 if (!rank) { 2937 /* determine max buffer needed and allocate it */ 2938 maxnz = 0; 2939 for (i=0; i<size; i++) { 2940 maxnz = PetscMax(maxnz,procsnz[i]); 2941 } 2942 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 2943 2944 /* read in my part of the matrix column indices */ 2945 nz = procsnz[0]; 2946 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2947 ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr); 2948 2949 /* read in every one elses and ship off */ 2950 for (i=1; i<size; i++) { 2951 nz = procsnz[i]; 2952 ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr); 2953 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 2954 } 2955 ierr = PetscFree(cols);CHKERRQ(ierr); 2956 } else { 2957 /* determine buffer space needed for message */ 2958 nz = 0; 2959 for (i=0; i<m; i++) { 2960 nz += ourlens[i]; 2961 } 2962 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 2963 2964 /* receive message of column indices*/ 2965 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 2966 } 2967 2968 /* determine column ownership if matrix is not square */ 2969 if (N != M) { 2970 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 2971 else n = newMat->cmap->n; 2972 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 2973 cstart = cend - n; 2974 } else { 2975 cstart = rstart; 2976 cend = rend; 2977 n = cend - cstart; 2978 } 2979 2980 /* loop over local rows, determining number of off diagonal entries */ 2981 ierr = PetscMemzero(offlens,m*sizeof(PetscInt));CHKERRQ(ierr); 2982 jj = 0; 2983 for (i=0; i<m; i++) { 2984 for (j=0; j<ourlens[i]; j++) { 2985 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 2986 jj++; 2987 } 2988 } 2989 2990 for (i=0; i<m; i++) { 2991 ourlens[i] -= offlens[i]; 2992 } 2993 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 2994 2995 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 2996 2997 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 2998 2999 for (i=0; i<m; i++) { 3000 ourlens[i] += offlens[i]; 3001 } 3002 3003 if (!rank) { 3004 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3005 3006 /* read in my part of the matrix numerical values */ 3007 nz = procsnz[0]; 3008 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3009 3010 /* insert into matrix */ 3011 jj = rstart; 3012 smycols = mycols; 3013 svals = vals; 3014 for (i=0; i<m; i++) { 3015 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3016 smycols += ourlens[i]; 3017 svals += ourlens[i]; 3018 jj++; 3019 } 3020 3021 /* read in other processors and ship out */ 3022 for (i=1; i<size; i++) { 3023 nz = procsnz[i]; 3024 ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr); 3025 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3026 } 3027 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3028 } else { 3029 /* receive numeric values */ 3030 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3031 3032 /* receive message of values*/ 3033 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3034 3035 /* insert into matrix */ 3036 jj = rstart; 3037 smycols = mycols; 3038 svals = vals; 3039 for (i=0; i<m; i++) { 3040 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3041 smycols += ourlens[i]; 3042 svals += ourlens[i]; 3043 jj++; 3044 } 3045 } 3046 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3047 ierr = PetscFree(vals);CHKERRQ(ierr); 3048 ierr = PetscFree(mycols);CHKERRQ(ierr); 3049 ierr = PetscFree(rowners);CHKERRQ(ierr); 3050 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3051 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3052 PetscFunctionReturn(0); 3053 } 3054 3055 #undef __FUNCT__ 3056 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ" 3057 /* TODO: Not scalable because of ISAllGather() unless getting all columns. */ 3058 PetscErrorCode MatGetSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3059 { 3060 PetscErrorCode ierr; 3061 IS iscol_local; 3062 PetscInt csize; 3063 3064 PetscFunctionBegin; 3065 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3066 if (call == MAT_REUSE_MATRIX) { 3067 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3068 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3069 } else { 3070 /* check if we are grabbing all columns*/ 3071 PetscBool isstride; 3072 PetscMPIInt lisstride = 0,gisstride; 3073 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3074 if (isstride) { 3075 PetscInt start,len,mstart,mlen; 3076 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3077 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3078 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3079 if (mstart == start && mlen-mstart == len) lisstride = 1; 3080 } 3081 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3082 if (gisstride) { 3083 PetscInt N; 3084 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3085 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3086 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3087 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3088 } else { 3089 PetscInt cbs; 3090 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3091 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3092 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3093 } 3094 } 3095 ierr = MatGetSubMatrix_MPIAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3096 if (call == MAT_INITIAL_MATRIX) { 3097 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3098 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3099 } 3100 PetscFunctionReturn(0); 3101 } 3102 3103 extern PetscErrorCode MatGetSubMatrices_MPIAIJ_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,Mat*); 3104 #undef __FUNCT__ 3105 #define __FUNCT__ "MatGetSubMatrix_MPIAIJ_Private" 3106 /* 3107 Not great since it makes two copies of the submatrix, first an SeqAIJ 3108 in local and then by concatenating the local matrices the end result. 3109 Writing it directly would be much like MatGetSubMatrices_MPIAIJ() 3110 3111 Note: This requires a sequential iscol with all indices. 3112 */ 3113 PetscErrorCode MatGetSubMatrix_MPIAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3114 { 3115 PetscErrorCode ierr; 3116 PetscMPIInt rank,size; 3117 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3118 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol; 3119 PetscBool allcolumns, colflag; 3120 Mat M,Mreuse; 3121 MatScalar *vwork,*aa; 3122 MPI_Comm comm; 3123 Mat_SeqAIJ *aij; 3124 3125 PetscFunctionBegin; 3126 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3127 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3128 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3129 3130 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3131 ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr); 3132 if (colflag && ncol == mat->cmap->N) { 3133 allcolumns = PETSC_TRUE; 3134 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix\n");CHKERRQ(ierr); 3135 } else { 3136 allcolumns = PETSC_FALSE; 3137 } 3138 if (call == MAT_REUSE_MATRIX) { 3139 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3140 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3141 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3142 } else { 3143 ierr = MatGetSubMatrices_MPIAIJ_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&allcolumns,&Mreuse);CHKERRQ(ierr); 3144 } 3145 3146 /* 3147 m - number of local rows 3148 n - number of columns (same on all processors) 3149 rstart - first row in new global matrix generated 3150 */ 3151 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3152 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3153 if (call == MAT_INITIAL_MATRIX) { 3154 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3155 ii = aij->i; 3156 jj = aij->j; 3157 3158 /* 3159 Determine the number of non-zeros in the diagonal and off-diagonal 3160 portions of the matrix in order to do correct preallocation 3161 */ 3162 3163 /* first get start and end of "diagonal" columns */ 3164 if (csize == PETSC_DECIDE) { 3165 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3166 if (mglobal == n) { /* square matrix */ 3167 nlocal = m; 3168 } else { 3169 nlocal = n/size + ((n % size) > rank); 3170 } 3171 } else { 3172 nlocal = csize; 3173 } 3174 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3175 rstart = rend - nlocal; 3176 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3177 3178 /* next, compute all the lengths */ 3179 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3180 olens = dlens + m; 3181 for (i=0; i<m; i++) { 3182 jend = ii[i+1] - ii[i]; 3183 olen = 0; 3184 dlen = 0; 3185 for (j=0; j<jend; j++) { 3186 if (*jj < rstart || *jj >= rend) olen++; 3187 else dlen++; 3188 jj++; 3189 } 3190 olens[i] = olen; 3191 dlens[i] = dlen; 3192 } 3193 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3194 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3195 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3196 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3197 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3198 ierr = PetscFree(dlens);CHKERRQ(ierr); 3199 } else { 3200 PetscInt ml,nl; 3201 3202 M = *newmat; 3203 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3204 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3205 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3206 /* 3207 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3208 rather than the slower MatSetValues(). 3209 */ 3210 M->was_assembled = PETSC_TRUE; 3211 M->assembled = PETSC_FALSE; 3212 } 3213 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3214 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3215 ii = aij->i; 3216 jj = aij->j; 3217 aa = aij->a; 3218 for (i=0; i<m; i++) { 3219 row = rstart + i; 3220 nz = ii[i+1] - ii[i]; 3221 cwork = jj; jj += nz; 3222 vwork = aa; aa += nz; 3223 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3224 } 3225 3226 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3227 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3228 *newmat = M; 3229 3230 /* save submatrix used in processor for next request */ 3231 if (call == MAT_INITIAL_MATRIX) { 3232 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3233 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3234 } 3235 PetscFunctionReturn(0); 3236 } 3237 3238 #undef __FUNCT__ 3239 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR_MPIAIJ" 3240 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3241 { 3242 PetscInt m,cstart, cend,j,nnz,i,d; 3243 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3244 const PetscInt *JJ; 3245 PetscScalar *values; 3246 PetscErrorCode ierr; 3247 3248 PetscFunctionBegin; 3249 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3250 3251 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3252 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3253 m = B->rmap->n; 3254 cstart = B->cmap->rstart; 3255 cend = B->cmap->rend; 3256 rstart = B->rmap->rstart; 3257 3258 ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3259 3260 #if defined(PETSC_USE_DEBUGGING) 3261 for (i=0; i<m; i++) { 3262 nnz = Ii[i+1]- Ii[i]; 3263 JJ = J + Ii[i]; 3264 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3265 if (nnz && (JJ[0] < 0)) SETERRRQ1(PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,j); 3266 if (nnz && (JJ[nnz-1] >= B->cmap->N) SETERRRQ3(PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3267 } 3268 #endif 3269 3270 for (i=0; i<m; i++) { 3271 nnz = Ii[i+1]- Ii[i]; 3272 JJ = J + Ii[i]; 3273 nnz_max = PetscMax(nnz_max,nnz); 3274 d = 0; 3275 for (j=0; j<nnz; j++) { 3276 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3277 } 3278 d_nnz[i] = d; 3279 o_nnz[i] = nnz - d; 3280 } 3281 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3282 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3283 3284 if (v) values = (PetscScalar*)v; 3285 else { 3286 ierr = PetscCalloc1(nnz_max+1,&values);CHKERRQ(ierr); 3287 } 3288 3289 for (i=0; i<m; i++) { 3290 ii = i + rstart; 3291 nnz = Ii[i+1]- Ii[i]; 3292 ierr = MatSetValues_MPIAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr); 3293 } 3294 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3295 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3296 3297 if (!v) { 3298 ierr = PetscFree(values);CHKERRQ(ierr); 3299 } 3300 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3301 PetscFunctionReturn(0); 3302 } 3303 3304 #undef __FUNCT__ 3305 #define __FUNCT__ "MatMPIAIJSetPreallocationCSR" 3306 /*@ 3307 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3308 (the default parallel PETSc format). 3309 3310 Collective on MPI_Comm 3311 3312 Input Parameters: 3313 + B - the matrix 3314 . i - the indices into j for the start of each local row (starts with zero) 3315 . j - the column indices for each local row (starts with zero) 3316 - v - optional values in the matrix 3317 3318 Level: developer 3319 3320 Notes: 3321 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3322 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3323 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3324 3325 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3326 3327 The format which is used for the sparse matrix input, is equivalent to a 3328 row-major ordering.. i.e for the following matrix, the input data expected is 3329 as shown 3330 3331 $ 1 0 0 3332 $ 2 0 3 P0 3333 $ ------- 3334 $ 4 5 6 P1 3335 $ 3336 $ Process0 [P0]: rows_owned=[0,1] 3337 $ i = {0,1,3} [size = nrow+1 = 2+1] 3338 $ j = {0,0,2} [size = 3] 3339 $ v = {1,2,3} [size = 3] 3340 $ 3341 $ Process1 [P1]: rows_owned=[2] 3342 $ i = {0,3} [size = nrow+1 = 1+1] 3343 $ j = {0,1,2} [size = 3] 3344 $ v = {4,5,6} [size = 3] 3345 3346 .keywords: matrix, aij, compressed row, sparse, parallel 3347 3348 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 3349 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 3350 @*/ 3351 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3352 { 3353 PetscErrorCode ierr; 3354 3355 PetscFunctionBegin; 3356 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 3357 PetscFunctionReturn(0); 3358 } 3359 3360 #undef __FUNCT__ 3361 #define __FUNCT__ "MatMPIAIJSetPreallocation" 3362 /*@C 3363 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3364 (the default parallel PETSc format). For good matrix assembly performance 3365 the user should preallocate the matrix storage by setting the parameters 3366 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3367 performance can be increased by more than a factor of 50. 3368 3369 Collective on MPI_Comm 3370 3371 Input Parameters: 3372 + B - the matrix 3373 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3374 (same value is used for all local rows) 3375 . d_nnz - array containing the number of nonzeros in the various rows of the 3376 DIAGONAL portion of the local submatrix (possibly different for each row) 3377 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3378 The size of this array is equal to the number of local rows, i.e 'm'. 3379 For matrices that will be factored, you must leave room for (and set) 3380 the diagonal entry even if it is zero. 3381 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3382 submatrix (same value is used for all local rows). 3383 - o_nnz - array containing the number of nonzeros in the various rows of the 3384 OFF-DIAGONAL portion of the local submatrix (possibly different for 3385 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3386 structure. The size of this array is equal to the number 3387 of local rows, i.e 'm'. 3388 3389 If the *_nnz parameter is given then the *_nz parameter is ignored 3390 3391 The AIJ format (also called the Yale sparse matrix format or 3392 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3393 storage. The stored row and column indices begin with zero. 3394 See Users-Manual: ch_mat for details. 3395 3396 The parallel matrix is partitioned such that the first m0 rows belong to 3397 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3398 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3399 3400 The DIAGONAL portion of the local submatrix of a processor can be defined 3401 as the submatrix which is obtained by extraction the part corresponding to 3402 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3403 first row that belongs to the processor, r2 is the last row belonging to 3404 the this processor, and c1-c2 is range of indices of the local part of a 3405 vector suitable for applying the matrix to. This is an mxn matrix. In the 3406 common case of a square matrix, the row and column ranges are the same and 3407 the DIAGONAL part is also square. The remaining portion of the local 3408 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3409 3410 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3411 3412 You can call MatGetInfo() to get information on how effective the preallocation was; 3413 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3414 You can also run with the option -info and look for messages with the string 3415 malloc in them to see if additional memory allocation was needed. 3416 3417 Example usage: 3418 3419 Consider the following 8x8 matrix with 34 non-zero values, that is 3420 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3421 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3422 as follows: 3423 3424 .vb 3425 1 2 0 | 0 3 0 | 0 4 3426 Proc0 0 5 6 | 7 0 0 | 8 0 3427 9 0 10 | 11 0 0 | 12 0 3428 ------------------------------------- 3429 13 0 14 | 15 16 17 | 0 0 3430 Proc1 0 18 0 | 19 20 21 | 0 0 3431 0 0 0 | 22 23 0 | 24 0 3432 ------------------------------------- 3433 Proc2 25 26 27 | 0 0 28 | 29 0 3434 30 0 0 | 31 32 33 | 0 34 3435 .ve 3436 3437 This can be represented as a collection of submatrices as: 3438 3439 .vb 3440 A B C 3441 D E F 3442 G H I 3443 .ve 3444 3445 Where the submatrices A,B,C are owned by proc0, D,E,F are 3446 owned by proc1, G,H,I are owned by proc2. 3447 3448 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3449 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3450 The 'M','N' parameters are 8,8, and have the same values on all procs. 3451 3452 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3453 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3454 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3455 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3456 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3457 matrix, ans [DF] as another SeqAIJ matrix. 3458 3459 When d_nz, o_nz parameters are specified, d_nz storage elements are 3460 allocated for every row of the local diagonal submatrix, and o_nz 3461 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3462 One way to choose d_nz and o_nz is to use the max nonzerors per local 3463 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3464 In this case, the values of d_nz,o_nz are: 3465 .vb 3466 proc0 : dnz = 2, o_nz = 2 3467 proc1 : dnz = 3, o_nz = 2 3468 proc2 : dnz = 1, o_nz = 4 3469 .ve 3470 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3471 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3472 for proc3. i.e we are using 12+15+10=37 storage locations to store 3473 34 values. 3474 3475 When d_nnz, o_nnz parameters are specified, the storage is specified 3476 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3477 In the above case the values for d_nnz,o_nnz are: 3478 .vb 3479 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3480 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3481 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3482 .ve 3483 Here the space allocated is sum of all the above values i.e 34, and 3484 hence pre-allocation is perfect. 3485 3486 Level: intermediate 3487 3488 .keywords: matrix, aij, compressed row, sparse, parallel 3489 3490 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 3491 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 3492 @*/ 3493 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 3494 { 3495 PetscErrorCode ierr; 3496 3497 PetscFunctionBegin; 3498 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3499 PetscValidType(B,1); 3500 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 3501 PetscFunctionReturn(0); 3502 } 3503 3504 #undef __FUNCT__ 3505 #define __FUNCT__ "MatCreateMPIAIJWithArrays" 3506 /*@ 3507 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 3508 CSR format the local rows. 3509 3510 Collective on MPI_Comm 3511 3512 Input Parameters: 3513 + comm - MPI communicator 3514 . m - number of local rows (Cannot be PETSC_DECIDE) 3515 . n - This value should be the same as the local size used in creating the 3516 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3517 calculated if N is given) For square matrices n is almost always m. 3518 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3519 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3520 . i - row indices 3521 . j - column indices 3522 - a - matrix values 3523 3524 Output Parameter: 3525 . mat - the matrix 3526 3527 Level: intermediate 3528 3529 Notes: 3530 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3531 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3532 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3533 3534 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3535 3536 The format which is used for the sparse matrix input, is equivalent to a 3537 row-major ordering.. i.e for the following matrix, the input data expected is 3538 as shown 3539 3540 $ 1 0 0 3541 $ 2 0 3 P0 3542 $ ------- 3543 $ 4 5 6 P1 3544 $ 3545 $ Process0 [P0]: rows_owned=[0,1] 3546 $ i = {0,1,3} [size = nrow+1 = 2+1] 3547 $ j = {0,0,2} [size = 3] 3548 $ v = {1,2,3} [size = 3] 3549 $ 3550 $ Process1 [P1]: rows_owned=[2] 3551 $ i = {0,3} [size = nrow+1 = 1+1] 3552 $ j = {0,1,2} [size = 3] 3553 $ v = {4,5,6} [size = 3] 3554 3555 .keywords: matrix, aij, compressed row, sparse, parallel 3556 3557 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3558 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays() 3559 @*/ 3560 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 3561 { 3562 PetscErrorCode ierr; 3563 3564 PetscFunctionBegin; 3565 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3566 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3567 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3568 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 3569 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 3570 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3571 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 3572 PetscFunctionReturn(0); 3573 } 3574 3575 #undef __FUNCT__ 3576 #define __FUNCT__ "MatCreateAIJ" 3577 /*@C 3578 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 3579 (the default parallel PETSc format). For good matrix assembly performance 3580 the user should preallocate the matrix storage by setting the parameters 3581 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3582 performance can be increased by more than a factor of 50. 3583 3584 Collective on MPI_Comm 3585 3586 Input Parameters: 3587 + comm - MPI communicator 3588 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 3589 This value should be the same as the local size used in creating the 3590 y vector for the matrix-vector product y = Ax. 3591 . n - This value should be the same as the local size used in creating the 3592 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 3593 calculated if N is given) For square matrices n is almost always m. 3594 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 3595 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 3596 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3597 (same value is used for all local rows) 3598 . d_nnz - array containing the number of nonzeros in the various rows of the 3599 DIAGONAL portion of the local submatrix (possibly different for each row) 3600 or NULL, if d_nz is used to specify the nonzero structure. 3601 The size of this array is equal to the number of local rows, i.e 'm'. 3602 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3603 submatrix (same value is used for all local rows). 3604 - o_nnz - array containing the number of nonzeros in the various rows of the 3605 OFF-DIAGONAL portion of the local submatrix (possibly different for 3606 each row) or NULL, if o_nz is used to specify the nonzero 3607 structure. The size of this array is equal to the number 3608 of local rows, i.e 'm'. 3609 3610 Output Parameter: 3611 . A - the matrix 3612 3613 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3614 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3615 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3616 3617 Notes: 3618 If the *_nnz parameter is given then the *_nz parameter is ignored 3619 3620 m,n,M,N parameters specify the size of the matrix, and its partitioning across 3621 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 3622 storage requirements for this matrix. 3623 3624 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 3625 processor than it must be used on all processors that share the object for 3626 that argument. 3627 3628 The user MUST specify either the local or global matrix dimensions 3629 (possibly both). 3630 3631 The parallel matrix is partitioned across processors such that the 3632 first m0 rows belong to process 0, the next m1 rows belong to 3633 process 1, the next m2 rows belong to process 2 etc.. where 3634 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 3635 values corresponding to [m x N] submatrix. 3636 3637 The columns are logically partitioned with the n0 columns belonging 3638 to 0th partition, the next n1 columns belonging to the next 3639 partition etc.. where n0,n1,n2... are the input parameter 'n'. 3640 3641 The DIAGONAL portion of the local submatrix on any given processor 3642 is the submatrix corresponding to the rows and columns m,n 3643 corresponding to the given processor. i.e diagonal matrix on 3644 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 3645 etc. The remaining portion of the local submatrix [m x (N-n)] 3646 constitute the OFF-DIAGONAL portion. The example below better 3647 illustrates this concept. 3648 3649 For a square global matrix we define each processor's diagonal portion 3650 to be its local rows and the corresponding columns (a square submatrix); 3651 each processor's off-diagonal portion encompasses the remainder of the 3652 local matrix (a rectangular submatrix). 3653 3654 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3655 3656 When calling this routine with a single process communicator, a matrix of 3657 type SEQAIJ is returned. If a matrix of type MATMPIAIJ is desired for this 3658 type of communicator, use the construction mechanism: 3659 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 3660 3661 By default, this format uses inodes (identical nodes) when possible. 3662 We search for consecutive rows with the same nonzero structure, thereby 3663 reusing matrix information to achieve increased efficiency. 3664 3665 Options Database Keys: 3666 + -mat_no_inode - Do not use inodes 3667 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 3668 - -mat_aij_oneindex - Internally use indexing starting at 1 3669 rather than 0. Note that when calling MatSetValues(), 3670 the user still MUST index entries starting at 0! 3671 3672 3673 Example usage: 3674 3675 Consider the following 8x8 matrix with 34 non-zero values, that is 3676 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3677 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3678 as follows: 3679 3680 .vb 3681 1 2 0 | 0 3 0 | 0 4 3682 Proc0 0 5 6 | 7 0 0 | 8 0 3683 9 0 10 | 11 0 0 | 12 0 3684 ------------------------------------- 3685 13 0 14 | 15 16 17 | 0 0 3686 Proc1 0 18 0 | 19 20 21 | 0 0 3687 0 0 0 | 22 23 0 | 24 0 3688 ------------------------------------- 3689 Proc2 25 26 27 | 0 0 28 | 29 0 3690 30 0 0 | 31 32 33 | 0 34 3691 .ve 3692 3693 This can be represented as a collection of submatrices as: 3694 3695 .vb 3696 A B C 3697 D E F 3698 G H I 3699 .ve 3700 3701 Where the submatrices A,B,C are owned by proc0, D,E,F are 3702 owned by proc1, G,H,I are owned by proc2. 3703 3704 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3705 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 3706 The 'M','N' parameters are 8,8, and have the same values on all procs. 3707 3708 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 3709 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 3710 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 3711 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 3712 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 3713 matrix, ans [DF] as another SeqAIJ matrix. 3714 3715 When d_nz, o_nz parameters are specified, d_nz storage elements are 3716 allocated for every row of the local diagonal submatrix, and o_nz 3717 storage locations are allocated for every row of the OFF-DIAGONAL submat. 3718 One way to choose d_nz and o_nz is to use the max nonzerors per local 3719 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 3720 In this case, the values of d_nz,o_nz are: 3721 .vb 3722 proc0 : dnz = 2, o_nz = 2 3723 proc1 : dnz = 3, o_nz = 2 3724 proc2 : dnz = 1, o_nz = 4 3725 .ve 3726 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 3727 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 3728 for proc3. i.e we are using 12+15+10=37 storage locations to store 3729 34 values. 3730 3731 When d_nnz, o_nnz parameters are specified, the storage is specified 3732 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 3733 In the above case the values for d_nnz,o_nnz are: 3734 .vb 3735 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 3736 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 3737 proc2: d_nnz = [1,1] and o_nnz = [4,4] 3738 .ve 3739 Here the space allocated is sum of all the above values i.e 34, and 3740 hence pre-allocation is perfect. 3741 3742 Level: intermediate 3743 3744 .keywords: matrix, aij, compressed row, sparse, parallel 3745 3746 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 3747 MATMPIAIJ, MatCreateMPIAIJWithArrays() 3748 @*/ 3749 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 3750 { 3751 PetscErrorCode ierr; 3752 PetscMPIInt size; 3753 3754 PetscFunctionBegin; 3755 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3756 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 3757 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3758 if (size > 1) { 3759 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 3760 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 3761 } else { 3762 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 3763 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 3764 } 3765 PetscFunctionReturn(0); 3766 } 3767 3768 #undef __FUNCT__ 3769 #define __FUNCT__ "MatMPIAIJGetSeqAIJ" 3770 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 3771 { 3772 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 3773 PetscBool flg; 3774 PetscErrorCode ierr; 3775 3776 PetscFunctionBegin; 3777 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr); 3778 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 3779 if (Ad) *Ad = a->A; 3780 if (Ao) *Ao = a->B; 3781 if (colmap) *colmap = a->garray; 3782 PetscFunctionReturn(0); 3783 } 3784 3785 #undef __FUNCT__ 3786 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIAIJ" 3787 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3788 { 3789 PetscErrorCode ierr; 3790 PetscInt m,N,i,rstart,nnz,Ii; 3791 PetscInt *indx; 3792 PetscScalar *values; 3793 3794 PetscFunctionBegin; 3795 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 3796 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3797 PetscInt *dnz,*onz,sum,bs,cbs; 3798 3799 if (n == PETSC_DECIDE) { 3800 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 3801 } 3802 /* Check sum(n) = N */ 3803 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3804 if (sum != N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",N); 3805 3806 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3807 rstart -= m; 3808 3809 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 3810 for (i=0; i<m; i++) { 3811 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3812 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 3813 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 3814 } 3815 3816 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 3817 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 3818 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 3819 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 3820 ierr = MatSetType(*outmat,MATMPIAIJ);CHKERRQ(ierr); 3821 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 3822 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 3823 } 3824 3825 /* numeric phase */ 3826 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 3827 for (i=0; i<m; i++) { 3828 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3829 Ii = i + rstart; 3830 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3831 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 3832 } 3833 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3834 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3835 PetscFunctionReturn(0); 3836 } 3837 3838 #undef __FUNCT__ 3839 #define __FUNCT__ "MatFileSplit" 3840 PetscErrorCode MatFileSplit(Mat A,char *outfile) 3841 { 3842 PetscErrorCode ierr; 3843 PetscMPIInt rank; 3844 PetscInt m,N,i,rstart,nnz; 3845 size_t len; 3846 const PetscInt *indx; 3847 PetscViewer out; 3848 char *name; 3849 Mat B; 3850 const PetscScalar *values; 3851 3852 PetscFunctionBegin; 3853 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 3854 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 3855 /* Should this be the type of the diagonal block of A? */ 3856 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 3857 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 3858 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 3859 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 3860 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 3861 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 3862 for (i=0; i<m; i++) { 3863 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3864 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 3865 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 3866 } 3867 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3868 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3869 3870 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 3871 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 3872 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 3873 sprintf(name,"%s.%d",outfile,rank); 3874 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 3875 ierr = PetscFree(name);CHKERRQ(ierr); 3876 ierr = MatView(B,out);CHKERRQ(ierr); 3877 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 3878 ierr = MatDestroy(&B);CHKERRQ(ierr); 3879 PetscFunctionReturn(0); 3880 } 3881 3882 extern PetscErrorCode MatDestroy_MPIAIJ(Mat); 3883 #undef __FUNCT__ 3884 #define __FUNCT__ "MatDestroy_MPIAIJ_SeqsToMPI" 3885 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 3886 { 3887 PetscErrorCode ierr; 3888 Mat_Merge_SeqsToMPI *merge; 3889 PetscContainer container; 3890 3891 PetscFunctionBegin; 3892 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3893 if (container) { 3894 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3895 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 3896 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 3897 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 3898 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 3899 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 3900 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 3901 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 3902 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 3903 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 3904 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 3905 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 3906 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 3907 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 3908 ierr = PetscFree(merge);CHKERRQ(ierr); 3909 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 3910 } 3911 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 3912 PetscFunctionReturn(0); 3913 } 3914 3915 #include <../src/mat/utils/freespace.h> 3916 #include <petscbt.h> 3917 3918 #undef __FUNCT__ 3919 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJNumeric" 3920 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 3921 { 3922 PetscErrorCode ierr; 3923 MPI_Comm comm; 3924 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 3925 PetscMPIInt size,rank,taga,*len_s; 3926 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 3927 PetscInt proc,m; 3928 PetscInt **buf_ri,**buf_rj; 3929 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 3930 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 3931 MPI_Request *s_waits,*r_waits; 3932 MPI_Status *status; 3933 MatScalar *aa=a->a; 3934 MatScalar **abuf_r,*ba_i; 3935 Mat_Merge_SeqsToMPI *merge; 3936 PetscContainer container; 3937 3938 PetscFunctionBegin; 3939 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 3940 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 3941 3942 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3943 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3944 3945 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 3946 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 3947 3948 bi = merge->bi; 3949 bj = merge->bj; 3950 buf_ri = merge->buf_ri; 3951 buf_rj = merge->buf_rj; 3952 3953 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 3954 owners = merge->rowmap->range; 3955 len_s = merge->len_s; 3956 3957 /* send and recv matrix values */ 3958 /*-----------------------------*/ 3959 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 3960 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 3961 3962 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 3963 for (proc=0,k=0; proc<size; proc++) { 3964 if (!len_s[proc]) continue; 3965 i = owners[proc]; 3966 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 3967 k++; 3968 } 3969 3970 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 3971 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 3972 ierr = PetscFree(status);CHKERRQ(ierr); 3973 3974 ierr = PetscFree(s_waits);CHKERRQ(ierr); 3975 ierr = PetscFree(r_waits);CHKERRQ(ierr); 3976 3977 /* insert mat values of mpimat */ 3978 /*----------------------------*/ 3979 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 3980 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 3981 3982 for (k=0; k<merge->nrecv; k++) { 3983 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 3984 nrows = *(buf_ri_k[k]); 3985 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 3986 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 3987 } 3988 3989 /* set values of ba */ 3990 m = merge->rowmap->n; 3991 for (i=0; i<m; i++) { 3992 arow = owners[rank] + i; 3993 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 3994 bnzi = bi[i+1] - bi[i]; 3995 ierr = PetscMemzero(ba_i,bnzi*sizeof(PetscScalar));CHKERRQ(ierr); 3996 3997 /* add local non-zero vals of this proc's seqmat into ba */ 3998 anzi = ai[arow+1] - ai[arow]; 3999 aj = a->j + ai[arow]; 4000 aa = a->a + ai[arow]; 4001 nextaj = 0; 4002 for (j=0; nextaj<anzi; j++) { 4003 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4004 ba_i[j] += aa[nextaj++]; 4005 } 4006 } 4007 4008 /* add received vals into ba */ 4009 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4010 /* i-th row */ 4011 if (i == *nextrow[k]) { 4012 anzi = *(nextai[k]+1) - *nextai[k]; 4013 aj = buf_rj[k] + *(nextai[k]); 4014 aa = abuf_r[k] + *(nextai[k]); 4015 nextaj = 0; 4016 for (j=0; nextaj<anzi; j++) { 4017 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4018 ba_i[j] += aa[nextaj++]; 4019 } 4020 } 4021 nextrow[k]++; nextai[k]++; 4022 } 4023 } 4024 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4025 } 4026 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4027 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4028 4029 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4030 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4031 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4032 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4033 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4034 PetscFunctionReturn(0); 4035 } 4036 4037 extern PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat); 4038 4039 #undef __FUNCT__ 4040 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJSymbolic" 4041 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4042 { 4043 PetscErrorCode ierr; 4044 Mat B_mpi; 4045 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4046 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4047 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4048 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4049 PetscInt len,proc,*dnz,*onz,bs,cbs; 4050 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4051 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4052 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4053 MPI_Status *status; 4054 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4055 PetscBT lnkbt; 4056 Mat_Merge_SeqsToMPI *merge; 4057 PetscContainer container; 4058 4059 PetscFunctionBegin; 4060 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4061 4062 /* make sure it is a PETSc comm */ 4063 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4064 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4065 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4066 4067 ierr = PetscNew(&merge);CHKERRQ(ierr); 4068 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4069 4070 /* determine row ownership */ 4071 /*---------------------------------------------------------*/ 4072 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4073 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4074 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4075 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4076 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4077 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4078 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4079 4080 m = merge->rowmap->n; 4081 owners = merge->rowmap->range; 4082 4083 /* determine the number of messages to send, their lengths */ 4084 /*---------------------------------------------------------*/ 4085 len_s = merge->len_s; 4086 4087 len = 0; /* length of buf_si[] */ 4088 merge->nsend = 0; 4089 for (proc=0; proc<size; proc++) { 4090 len_si[proc] = 0; 4091 if (proc == rank) { 4092 len_s[proc] = 0; 4093 } else { 4094 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4095 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4096 } 4097 if (len_s[proc]) { 4098 merge->nsend++; 4099 nrows = 0; 4100 for (i=owners[proc]; i<owners[proc+1]; i++) { 4101 if (ai[i+1] > ai[i]) nrows++; 4102 } 4103 len_si[proc] = 2*(nrows+1); 4104 len += len_si[proc]; 4105 } 4106 } 4107 4108 /* determine the number and length of messages to receive for ij-structure */ 4109 /*-------------------------------------------------------------------------*/ 4110 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4111 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4112 4113 /* post the Irecv of j-structure */ 4114 /*-------------------------------*/ 4115 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4116 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4117 4118 /* post the Isend of j-structure */ 4119 /*--------------------------------*/ 4120 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4121 4122 for (proc=0, k=0; proc<size; proc++) { 4123 if (!len_s[proc]) continue; 4124 i = owners[proc]; 4125 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4126 k++; 4127 } 4128 4129 /* receives and sends of j-structure are complete */ 4130 /*------------------------------------------------*/ 4131 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4132 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4133 4134 /* send and recv i-structure */ 4135 /*---------------------------*/ 4136 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4137 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4138 4139 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4140 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4141 for (proc=0,k=0; proc<size; proc++) { 4142 if (!len_s[proc]) continue; 4143 /* form outgoing message for i-structure: 4144 buf_si[0]: nrows to be sent 4145 [1:nrows]: row index (global) 4146 [nrows+1:2*nrows+1]: i-structure index 4147 */ 4148 /*-------------------------------------------*/ 4149 nrows = len_si[proc]/2 - 1; 4150 buf_si_i = buf_si + nrows+1; 4151 buf_si[0] = nrows; 4152 buf_si_i[0] = 0; 4153 nrows = 0; 4154 for (i=owners[proc]; i<owners[proc+1]; i++) { 4155 anzi = ai[i+1] - ai[i]; 4156 if (anzi) { 4157 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4158 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4159 nrows++; 4160 } 4161 } 4162 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4163 k++; 4164 buf_si += len_si[proc]; 4165 } 4166 4167 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4168 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4169 4170 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4171 for (i=0; i<merge->nrecv; i++) { 4172 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4173 } 4174 4175 ierr = PetscFree(len_si);CHKERRQ(ierr); 4176 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4177 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4178 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4179 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4180 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4181 ierr = PetscFree(status);CHKERRQ(ierr); 4182 4183 /* compute a local seq matrix in each processor */ 4184 /*----------------------------------------------*/ 4185 /* allocate bi array and free space for accumulating nonzero column info */ 4186 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4187 bi[0] = 0; 4188 4189 /* create and initialize a linked list */ 4190 nlnk = N+1; 4191 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4192 4193 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4194 len = ai[owners[rank+1]] - ai[owners[rank]]; 4195 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4196 4197 current_space = free_space; 4198 4199 /* determine symbolic info for each local row */ 4200 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4201 4202 for (k=0; k<merge->nrecv; k++) { 4203 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4204 nrows = *buf_ri_k[k]; 4205 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4206 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4207 } 4208 4209 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4210 len = 0; 4211 for (i=0; i<m; i++) { 4212 bnzi = 0; 4213 /* add local non-zero cols of this proc's seqmat into lnk */ 4214 arow = owners[rank] + i; 4215 anzi = ai[arow+1] - ai[arow]; 4216 aj = a->j + ai[arow]; 4217 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4218 bnzi += nlnk; 4219 /* add received col data into lnk */ 4220 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4221 if (i == *nextrow[k]) { /* i-th row */ 4222 anzi = *(nextai[k]+1) - *nextai[k]; 4223 aj = buf_rj[k] + *nextai[k]; 4224 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4225 bnzi += nlnk; 4226 nextrow[k]++; nextai[k]++; 4227 } 4228 } 4229 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4230 4231 /* if free space is not available, make more free space */ 4232 if (current_space->local_remaining<bnzi) { 4233 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4234 nspacedouble++; 4235 } 4236 /* copy data into free space, then initialize lnk */ 4237 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4238 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4239 4240 current_space->array += bnzi; 4241 current_space->local_used += bnzi; 4242 current_space->local_remaining -= bnzi; 4243 4244 bi[i+1] = bi[i] + bnzi; 4245 } 4246 4247 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4248 4249 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4250 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4251 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4252 4253 /* create symbolic parallel matrix B_mpi */ 4254 /*---------------------------------------*/ 4255 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4256 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4257 if (n==PETSC_DECIDE) { 4258 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4259 } else { 4260 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4261 } 4262 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4263 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4264 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4265 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4266 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 4267 4268 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4269 B_mpi->assembled = PETSC_FALSE; 4270 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 4271 merge->bi = bi; 4272 merge->bj = bj; 4273 merge->buf_ri = buf_ri; 4274 merge->buf_rj = buf_rj; 4275 merge->coi = NULL; 4276 merge->coj = NULL; 4277 merge->owners_co = NULL; 4278 4279 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 4280 4281 /* attach the supporting struct to B_mpi for reuse */ 4282 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 4283 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 4284 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 4285 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 4286 *mpimat = B_mpi; 4287 4288 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4289 PetscFunctionReturn(0); 4290 } 4291 4292 #undef __FUNCT__ 4293 #define __FUNCT__ "MatCreateMPIAIJSumSeqAIJ" 4294 /*@C 4295 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4296 matrices from each processor 4297 4298 Collective on MPI_Comm 4299 4300 Input Parameters: 4301 + comm - the communicators the parallel matrix will live on 4302 . seqmat - the input sequential matrices 4303 . m - number of local rows (or PETSC_DECIDE) 4304 . n - number of local columns (or PETSC_DECIDE) 4305 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4306 4307 Output Parameter: 4308 . mpimat - the parallel matrix generated 4309 4310 Level: advanced 4311 4312 Notes: 4313 The dimensions of the sequential matrix in each processor MUST be the same. 4314 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4315 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4316 @*/ 4317 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 4318 { 4319 PetscErrorCode ierr; 4320 PetscMPIInt size; 4321 4322 PetscFunctionBegin; 4323 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4324 if (size == 1) { 4325 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4326 if (scall == MAT_INITIAL_MATRIX) { 4327 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 4328 } else { 4329 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4330 } 4331 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4332 PetscFunctionReturn(0); 4333 } 4334 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4335 if (scall == MAT_INITIAL_MATRIX) { 4336 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 4337 } 4338 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 4339 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 4340 PetscFunctionReturn(0); 4341 } 4342 4343 #undef __FUNCT__ 4344 #define __FUNCT__ "MatMPIAIJGetLocalMat" 4345 /*@ 4346 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential vector with 4347 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 4348 with MatGetSize() 4349 4350 Not Collective 4351 4352 Input Parameters: 4353 + A - the matrix 4354 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4355 4356 Output Parameter: 4357 . A_loc - the local sequential matrix generated 4358 4359 Level: developer 4360 4361 .seealso: MatGetOwnerShipRange(), MatMPIAIJGetLocalMatCondensed() 4362 4363 @*/ 4364 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 4365 { 4366 PetscErrorCode ierr; 4367 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 4368 Mat_SeqAIJ *mat,*a,*b; 4369 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 4370 MatScalar *aa,*ba,*cam; 4371 PetscScalar *ca; 4372 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 4373 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 4374 PetscBool match; 4375 MPI_Comm comm; 4376 PetscMPIInt size; 4377 4378 PetscFunctionBegin; 4379 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4380 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4381 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4382 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4383 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 4384 4385 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4386 a = (Mat_SeqAIJ*)(mpimat->A)->data; 4387 b = (Mat_SeqAIJ*)(mpimat->B)->data; 4388 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 4389 aa = a->a; ba = b->a; 4390 if (scall == MAT_INITIAL_MATRIX) { 4391 if (size == 1) { 4392 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 4393 PetscFunctionReturn(0); 4394 } 4395 4396 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 4397 ci[0] = 0; 4398 for (i=0; i<am; i++) { 4399 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 4400 } 4401 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 4402 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 4403 k = 0; 4404 for (i=0; i<am; i++) { 4405 ncols_o = bi[i+1] - bi[i]; 4406 ncols_d = ai[i+1] - ai[i]; 4407 /* off-diagonal portion of A */ 4408 for (jo=0; jo<ncols_o; jo++) { 4409 col = cmap[*bj]; 4410 if (col >= cstart) break; 4411 cj[k] = col; bj++; 4412 ca[k++] = *ba++; 4413 } 4414 /* diagonal portion of A */ 4415 for (j=0; j<ncols_d; j++) { 4416 cj[k] = cstart + *aj++; 4417 ca[k++] = *aa++; 4418 } 4419 /* off-diagonal portion of A */ 4420 for (j=jo; j<ncols_o; j++) { 4421 cj[k] = cmap[*bj++]; 4422 ca[k++] = *ba++; 4423 } 4424 } 4425 /* put together the new matrix */ 4426 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 4427 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4428 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4429 mat = (Mat_SeqAIJ*)(*A_loc)->data; 4430 mat->free_a = PETSC_TRUE; 4431 mat->free_ij = PETSC_TRUE; 4432 mat->nonew = 0; 4433 } else if (scall == MAT_REUSE_MATRIX) { 4434 mat=(Mat_SeqAIJ*)(*A_loc)->data; 4435 ci = mat->i; cj = mat->j; cam = mat->a; 4436 for (i=0; i<am; i++) { 4437 /* off-diagonal portion of A */ 4438 ncols_o = bi[i+1] - bi[i]; 4439 for (jo=0; jo<ncols_o; jo++) { 4440 col = cmap[*bj]; 4441 if (col >= cstart) break; 4442 *cam++ = *ba++; bj++; 4443 } 4444 /* diagonal portion of A */ 4445 ncols_d = ai[i+1] - ai[i]; 4446 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 4447 /* off-diagonal portion of A */ 4448 for (j=jo; j<ncols_o; j++) { 4449 *cam++ = *ba++; bj++; 4450 } 4451 } 4452 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 4453 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 4454 PetscFunctionReturn(0); 4455 } 4456 4457 #undef __FUNCT__ 4458 #define __FUNCT__ "MatMPIAIJGetLocalMatCondensed" 4459 /*@C 4460 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 4461 4462 Not Collective 4463 4464 Input Parameters: 4465 + A - the matrix 4466 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4467 - row, col - index sets of rows and columns to extract (or NULL) 4468 4469 Output Parameter: 4470 . A_loc - the local sequential matrix generated 4471 4472 Level: developer 4473 4474 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 4475 4476 @*/ 4477 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 4478 { 4479 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4480 PetscErrorCode ierr; 4481 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 4482 IS isrowa,iscola; 4483 Mat *aloc; 4484 PetscBool match; 4485 4486 PetscFunctionBegin; 4487 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 4488 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 4489 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4490 if (!row) { 4491 start = A->rmap->rstart; end = A->rmap->rend; 4492 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 4493 } else { 4494 isrowa = *row; 4495 } 4496 if (!col) { 4497 start = A->cmap->rstart; 4498 cmap = a->garray; 4499 nzA = a->A->cmap->n; 4500 nzB = a->B->cmap->n; 4501 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4502 ncols = 0; 4503 for (i=0; i<nzB; i++) { 4504 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4505 else break; 4506 } 4507 imark = i; 4508 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 4509 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 4510 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 4511 } else { 4512 iscola = *col; 4513 } 4514 if (scall != MAT_INITIAL_MATRIX) { 4515 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 4516 aloc[0] = *A_loc; 4517 } 4518 ierr = MatGetSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 4519 *A_loc = aloc[0]; 4520 ierr = PetscFree(aloc);CHKERRQ(ierr); 4521 if (!row) { 4522 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 4523 } 4524 if (!col) { 4525 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 4526 } 4527 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 4528 PetscFunctionReturn(0); 4529 } 4530 4531 #undef __FUNCT__ 4532 #define __FUNCT__ "MatGetBrowsOfAcols" 4533 /*@C 4534 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 4535 4536 Collective on Mat 4537 4538 Input Parameters: 4539 + A,B - the matrices in mpiaij format 4540 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4541 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 4542 4543 Output Parameter: 4544 + rowb, colb - index sets of rows and columns of B to extract 4545 - B_seq - the sequential matrix generated 4546 4547 Level: developer 4548 4549 @*/ 4550 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 4551 { 4552 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4553 PetscErrorCode ierr; 4554 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 4555 IS isrowb,iscolb; 4556 Mat *bseq=NULL; 4557 4558 PetscFunctionBegin; 4559 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4560 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4561 } 4562 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4563 4564 if (scall == MAT_INITIAL_MATRIX) { 4565 start = A->cmap->rstart; 4566 cmap = a->garray; 4567 nzA = a->A->cmap->n; 4568 nzB = a->B->cmap->n; 4569 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 4570 ncols = 0; 4571 for (i=0; i<nzB; i++) { /* row < local row index */ 4572 if (cmap[i] < start) idx[ncols++] = cmap[i]; 4573 else break; 4574 } 4575 imark = i; 4576 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 4577 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 4578 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 4579 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 4580 } else { 4581 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 4582 isrowb = *rowb; iscolb = *colb; 4583 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 4584 bseq[0] = *B_seq; 4585 } 4586 ierr = MatGetSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 4587 *B_seq = bseq[0]; 4588 ierr = PetscFree(bseq);CHKERRQ(ierr); 4589 if (!rowb) { 4590 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 4591 } else { 4592 *rowb = isrowb; 4593 } 4594 if (!colb) { 4595 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 4596 } else { 4597 *colb = iscolb; 4598 } 4599 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 4600 PetscFunctionReturn(0); 4601 } 4602 4603 #undef __FUNCT__ 4604 #define __FUNCT__ "MatGetBrowsOfAoCols_MPIAIJ" 4605 /* 4606 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 4607 of the OFF-DIAGONAL portion of local A 4608 4609 Collective on Mat 4610 4611 Input Parameters: 4612 + A,B - the matrices in mpiaij format 4613 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4614 4615 Output Parameter: 4616 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 4617 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 4618 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 4619 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 4620 4621 Level: developer 4622 4623 */ 4624 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 4625 { 4626 VecScatter_MPI_General *gen_to,*gen_from; 4627 PetscErrorCode ierr; 4628 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 4629 Mat_SeqAIJ *b_oth; 4630 VecScatter ctx =a->Mvctx; 4631 MPI_Comm comm; 4632 PetscMPIInt *rprocs,*sprocs,tag=((PetscObject)ctx)->tag,rank; 4633 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj; 4634 PetscScalar *rvalues,*svalues; 4635 MatScalar *b_otha,*bufa,*bufA; 4636 PetscInt i,j,k,l,ll,nrecvs,nsends,nrows,*srow,*rstarts,*rstartsj = 0,*sstarts,*sstartsj,len; 4637 MPI_Request *rwaits = NULL,*swaits = NULL; 4638 MPI_Status *sstatus,rstatus; 4639 PetscMPIInt jj,size; 4640 PetscInt *cols,sbs,rbs; 4641 PetscScalar *vals; 4642 4643 PetscFunctionBegin; 4644 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 4645 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4646 4647 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 4648 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 4649 } 4650 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4651 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4652 4653 gen_to = (VecScatter_MPI_General*)ctx->todata; 4654 gen_from = (VecScatter_MPI_General*)ctx->fromdata; 4655 rvalues = gen_from->values; /* holds the length of receiving row */ 4656 svalues = gen_to->values; /* holds the length of sending row */ 4657 nrecvs = gen_from->n; 4658 nsends = gen_to->n; 4659 4660 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 4661 srow = gen_to->indices; /* local row index to be sent */ 4662 sstarts = gen_to->starts; 4663 sprocs = gen_to->procs; 4664 sstatus = gen_to->sstatus; 4665 sbs = gen_to->bs; 4666 rstarts = gen_from->starts; 4667 rprocs = gen_from->procs; 4668 rbs = gen_from->bs; 4669 4670 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 4671 if (scall == MAT_INITIAL_MATRIX) { 4672 /* i-array */ 4673 /*---------*/ 4674 /* post receives */ 4675 for (i=0; i<nrecvs; i++) { 4676 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4677 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 4678 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4679 } 4680 4681 /* pack the outgoing message */ 4682 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 4683 4684 sstartsj[0] = 0; 4685 rstartsj[0] = 0; 4686 len = 0; /* total length of j or a array to be sent */ 4687 k = 0; 4688 for (i=0; i<nsends; i++) { 4689 rowlen = (PetscInt*)svalues + sstarts[i]*sbs; 4690 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4691 for (j=0; j<nrows; j++) { 4692 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 4693 for (l=0; l<sbs; l++) { 4694 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 4695 4696 rowlen[j*sbs+l] = ncols; 4697 4698 len += ncols; 4699 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 4700 } 4701 k++; 4702 } 4703 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4704 4705 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 4706 } 4707 /* recvs and sends of i-array are completed */ 4708 i = nrecvs; 4709 while (i--) { 4710 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4711 } 4712 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4713 4714 /* allocate buffers for sending j and a arrays */ 4715 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 4716 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 4717 4718 /* create i-array of B_oth */ 4719 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 4720 4721 b_othi[0] = 0; 4722 len = 0; /* total length of j or a array to be received */ 4723 k = 0; 4724 for (i=0; i<nrecvs; i++) { 4725 rowlen = (PetscInt*)rvalues + rstarts[i]*rbs; 4726 nrows = rbs*(rstarts[i+1]-rstarts[i]); /* num of rows to be received */ 4727 for (j=0; j<nrows; j++) { 4728 b_othi[k+1] = b_othi[k] + rowlen[j]; 4729 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 4730 k++; 4731 } 4732 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 4733 } 4734 4735 /* allocate space for j and a arrrays of B_oth */ 4736 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 4737 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 4738 4739 /* j-array */ 4740 /*---------*/ 4741 /* post receives of j-array */ 4742 for (i=0; i<nrecvs; i++) { 4743 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4744 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4745 } 4746 4747 /* pack the outgoing message j-array */ 4748 k = 0; 4749 for (i=0; i<nsends; i++) { 4750 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4751 bufJ = bufj+sstartsj[i]; 4752 for (j=0; j<nrows; j++) { 4753 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4754 for (ll=0; ll<sbs; ll++) { 4755 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4756 for (l=0; l<ncols; l++) { 4757 *bufJ++ = cols[l]; 4758 } 4759 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 4760 } 4761 } 4762 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4763 } 4764 4765 /* recvs and sends of j-array are completed */ 4766 i = nrecvs; 4767 while (i--) { 4768 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4769 } 4770 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4771 } else if (scall == MAT_REUSE_MATRIX) { 4772 sstartsj = *startsj_s; 4773 rstartsj = *startsj_r; 4774 bufa = *bufa_ptr; 4775 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4776 b_otha = b_oth->a; 4777 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 4778 4779 /* a-array */ 4780 /*---------*/ 4781 /* post receives of a-array */ 4782 for (i=0; i<nrecvs; i++) { 4783 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 4784 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 4785 } 4786 4787 /* pack the outgoing message a-array */ 4788 k = 0; 4789 for (i=0; i<nsends; i++) { 4790 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 4791 bufA = bufa+sstartsj[i]; 4792 for (j=0; j<nrows; j++) { 4793 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 4794 for (ll=0; ll<sbs; ll++) { 4795 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4796 for (l=0; l<ncols; l++) { 4797 *bufA++ = vals[l]; 4798 } 4799 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 4800 } 4801 } 4802 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 4803 } 4804 /* recvs and sends of a-array are completed */ 4805 i = nrecvs; 4806 while (i--) { 4807 ierr = MPI_Waitany(nrecvs,rwaits,&jj,&rstatus);CHKERRQ(ierr); 4808 } 4809 if (nsends) {ierr = MPI_Waitall(nsends,swaits,sstatus);CHKERRQ(ierr);} 4810 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 4811 4812 if (scall == MAT_INITIAL_MATRIX) { 4813 /* put together the new matrix */ 4814 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 4815 4816 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 4817 /* Since these are PETSc arrays, change flags to free them as necessary. */ 4818 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 4819 b_oth->free_a = PETSC_TRUE; 4820 b_oth->free_ij = PETSC_TRUE; 4821 b_oth->nonew = 0; 4822 4823 ierr = PetscFree(bufj);CHKERRQ(ierr); 4824 if (!startsj_s || !bufa_ptr) { 4825 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 4826 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 4827 } else { 4828 *startsj_s = sstartsj; 4829 *startsj_r = rstartsj; 4830 *bufa_ptr = bufa; 4831 } 4832 } 4833 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 4834 PetscFunctionReturn(0); 4835 } 4836 4837 #undef __FUNCT__ 4838 #define __FUNCT__ "MatGetCommunicationStructs" 4839 /*@C 4840 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 4841 4842 Not Collective 4843 4844 Input Parameters: 4845 . A - The matrix in mpiaij format 4846 4847 Output Parameter: 4848 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 4849 . colmap - A map from global column index to local index into lvec 4850 - multScatter - A scatter from the argument of a matrix-vector product to lvec 4851 4852 Level: developer 4853 4854 @*/ 4855 #if defined(PETSC_USE_CTABLE) 4856 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 4857 #else 4858 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 4859 #endif 4860 { 4861 Mat_MPIAIJ *a; 4862 4863 PetscFunctionBegin; 4864 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 4865 PetscValidPointer(lvec, 2); 4866 PetscValidPointer(colmap, 3); 4867 PetscValidPointer(multScatter, 4); 4868 a = (Mat_MPIAIJ*) A->data; 4869 if (lvec) *lvec = a->lvec; 4870 if (colmap) *colmap = a->colmap; 4871 if (multScatter) *multScatter = a->Mvctx; 4872 PetscFunctionReturn(0); 4873 } 4874 4875 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 4876 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 4877 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 4878 #if defined(PETSC_HAVE_ELEMENTAL) 4879 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 4880 #endif 4881 #if defined(PETSC_HAVE_HYPRE) 4882 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 4883 #endif 4884 4885 #undef __FUNCT__ 4886 #define __FUNCT__ "MatMatMultNumeric_MPIDense_MPIAIJ" 4887 /* 4888 Computes (B'*A')' since computing B*A directly is untenable 4889 4890 n p p 4891 ( ) ( ) ( ) 4892 m ( A ) * n ( B ) = m ( C ) 4893 ( ) ( ) ( ) 4894 4895 */ 4896 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 4897 { 4898 PetscErrorCode ierr; 4899 Mat At,Bt,Ct; 4900 4901 PetscFunctionBegin; 4902 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 4903 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 4904 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 4905 ierr = MatDestroy(&At);CHKERRQ(ierr); 4906 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 4907 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 4908 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 4909 PetscFunctionReturn(0); 4910 } 4911 4912 #undef __FUNCT__ 4913 #define __FUNCT__ "MatMatMultSymbolic_MPIDense_MPIAIJ" 4914 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 4915 { 4916 PetscErrorCode ierr; 4917 PetscInt m=A->rmap->n,n=B->cmap->n; 4918 Mat Cmat; 4919 4920 PetscFunctionBegin; 4921 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 4922 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 4923 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4924 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 4925 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 4926 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 4927 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4928 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4929 4930 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 4931 4932 *C = Cmat; 4933 PetscFunctionReturn(0); 4934 } 4935 4936 /* ----------------------------------------------------------------*/ 4937 #undef __FUNCT__ 4938 #define __FUNCT__ "MatMatMult_MPIDense_MPIAIJ" 4939 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 4940 { 4941 PetscErrorCode ierr; 4942 4943 PetscFunctionBegin; 4944 if (scall == MAT_INITIAL_MATRIX) { 4945 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4946 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 4947 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 4948 } 4949 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4950 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 4951 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 4952 PetscFunctionReturn(0); 4953 } 4954 4955 /*MC 4956 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 4957 4958 Options Database Keys: 4959 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 4960 4961 Level: beginner 4962 4963 .seealso: MatCreateAIJ() 4964 M*/ 4965 4966 #undef __FUNCT__ 4967 #define __FUNCT__ "MatCreate_MPIAIJ" 4968 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 4969 { 4970 Mat_MPIAIJ *b; 4971 PetscErrorCode ierr; 4972 PetscMPIInt size; 4973 4974 PetscFunctionBegin; 4975 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 4976 4977 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 4978 B->data = (void*)b; 4979 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 4980 B->assembled = PETSC_FALSE; 4981 B->insertmode = NOT_SET_VALUES; 4982 b->size = size; 4983 4984 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 4985 4986 /* build cache for off array entries formed */ 4987 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 4988 4989 b->donotstash = PETSC_FALSE; 4990 b->colmap = 0; 4991 b->garray = 0; 4992 b->roworiented = PETSC_TRUE; 4993 4994 /* stuff used for matrix vector multiply */ 4995 b->lvec = NULL; 4996 b->Mvctx = NULL; 4997 4998 /* stuff for MatGetRow() */ 4999 b->rowindices = 0; 5000 b->rowvalues = 0; 5001 b->getrowactive = PETSC_FALSE; 5002 5003 /* flexible pointer used in CUSP/CUSPARSE classes */ 5004 b->spptr = NULL; 5005 5006 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5007 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5008 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5009 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5010 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5011 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5012 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5013 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5014 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5015 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5016 #if defined(PETSC_HAVE_ELEMENTAL) 5017 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5018 #endif 5019 #if defined(PETSC_HAVE_HYPRE) 5020 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5021 #endif 5022 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5023 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5024 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5025 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5026 PetscFunctionReturn(0); 5027 } 5028 5029 #undef __FUNCT__ 5030 #define __FUNCT__ "MatCreateMPIAIJWithSplitArrays" 5031 /*@C 5032 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5033 and "off-diagonal" part of the matrix in CSR format. 5034 5035 Collective on MPI_Comm 5036 5037 Input Parameters: 5038 + comm - MPI communicator 5039 . m - number of local rows (Cannot be PETSC_DECIDE) 5040 . n - This value should be the same as the local size used in creating the 5041 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5042 calculated if N is given) For square matrices n is almost always m. 5043 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5044 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5045 . i - row indices for "diagonal" portion of matrix 5046 . j - column indices 5047 . a - matrix values 5048 . oi - row indices for "off-diagonal" portion of matrix 5049 . oj - column indices 5050 - oa - matrix values 5051 5052 Output Parameter: 5053 . mat - the matrix 5054 5055 Level: advanced 5056 5057 Notes: 5058 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5059 must free the arrays once the matrix has been destroyed and not before. 5060 5061 The i and j indices are 0 based 5062 5063 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5064 5065 This sets local rows and cannot be used to set off-processor values. 5066 5067 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5068 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5069 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5070 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5071 keep track of the underlying array. Use MatSetOption(A,MAT_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5072 communication if it is known that only local entries will be set. 5073 5074 .keywords: matrix, aij, compressed row, sparse, parallel 5075 5076 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5077 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5078 @*/ 5079 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5080 { 5081 PetscErrorCode ierr; 5082 Mat_MPIAIJ *maij; 5083 5084 PetscFunctionBegin; 5085 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5086 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5087 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5088 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5089 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5090 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5091 maij = (Mat_MPIAIJ*) (*mat)->data; 5092 5093 (*mat)->preallocated = PETSC_TRUE; 5094 5095 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5096 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5097 5098 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5099 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5100 5101 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5102 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5103 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5104 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5105 5106 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5107 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5108 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5109 PetscFunctionReturn(0); 5110 } 5111 5112 /* 5113 Special version for direct calls from Fortran 5114 */ 5115 #include <petsc/private/fortranimpl.h> 5116 5117 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5118 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5119 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5120 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5121 #endif 5122 5123 /* Change these macros so can be used in void function */ 5124 #undef CHKERRQ 5125 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5126 #undef SETERRQ2 5127 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5128 #undef SETERRQ3 5129 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5130 #undef SETERRQ 5131 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5132 5133 #undef __FUNCT__ 5134 #define __FUNCT__ "matsetvaluesmpiaij_" 5135 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5136 { 5137 Mat mat = *mmat; 5138 PetscInt m = *mm, n = *mn; 5139 InsertMode addv = *maddv; 5140 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5141 PetscScalar value; 5142 PetscErrorCode ierr; 5143 5144 MatCheckPreallocated(mat,1); 5145 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5146 5147 #if defined(PETSC_USE_DEBUG) 5148 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5149 #endif 5150 { 5151 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5152 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5153 PetscBool roworiented = aij->roworiented; 5154 5155 /* Some Variables required in the macro */ 5156 Mat A = aij->A; 5157 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5158 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5159 MatScalar *aa = a->a; 5160 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5161 Mat B = aij->B; 5162 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5163 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5164 MatScalar *ba = b->a; 5165 5166 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5167 PetscInt nonew = a->nonew; 5168 MatScalar *ap1,*ap2; 5169 5170 PetscFunctionBegin; 5171 for (i=0; i<m; i++) { 5172 if (im[i] < 0) continue; 5173 #if defined(PETSC_USE_DEBUG) 5174 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5175 #endif 5176 if (im[i] >= rstart && im[i] < rend) { 5177 row = im[i] - rstart; 5178 lastcol1 = -1; 5179 rp1 = aj + ai[row]; 5180 ap1 = aa + ai[row]; 5181 rmax1 = aimax[row]; 5182 nrow1 = ailen[row]; 5183 low1 = 0; 5184 high1 = nrow1; 5185 lastcol2 = -1; 5186 rp2 = bj + bi[row]; 5187 ap2 = ba + bi[row]; 5188 rmax2 = bimax[row]; 5189 nrow2 = bilen[row]; 5190 low2 = 0; 5191 high2 = nrow2; 5192 5193 for (j=0; j<n; j++) { 5194 if (roworiented) value = v[i*n+j]; 5195 else value = v[i+j*m]; 5196 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES)) continue; 5197 if (in[j] >= cstart && in[j] < cend) { 5198 col = in[j] - cstart; 5199 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5200 } else if (in[j] < 0) continue; 5201 #if defined(PETSC_USE_DEBUG) 5202 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 5203 #endif 5204 else { 5205 if (mat->was_assembled) { 5206 if (!aij->colmap) { 5207 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5208 } 5209 #if defined(PETSC_USE_CTABLE) 5210 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5211 col--; 5212 #else 5213 col = aij->colmap[in[j]] - 1; 5214 #endif 5215 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5216 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5217 col = in[j]; 5218 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5219 B = aij->B; 5220 b = (Mat_SeqAIJ*)B->data; 5221 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5222 rp2 = bj + bi[row]; 5223 ap2 = ba + bi[row]; 5224 rmax2 = bimax[row]; 5225 nrow2 = bilen[row]; 5226 low2 = 0; 5227 high2 = nrow2; 5228 bm = aij->B->rmap->n; 5229 ba = b->a; 5230 } 5231 } else col = in[j]; 5232 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5233 } 5234 } 5235 } else if (!aij->donotstash) { 5236 if (roworiented) { 5237 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5238 } else { 5239 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5240 } 5241 } 5242 } 5243 } 5244 PetscFunctionReturnVoid(); 5245 } 5246 5247