1 2 3 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 4 #include <petsc/private/vecimpl.h> 5 #include <petsc/private/vecscatterimpl.h> 6 #include <petsc/private/isimpl.h> 7 #include <petscblaslapack.h> 8 #include <petscsf.h> 9 10 /*MC 11 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 12 13 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 14 and MATMPIAIJ otherwise. As a result, for single process communicators, 15 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 16 for communicators controlling multiple processes. It is recommended that you call both of 17 the above preallocation routines for simplicity. 18 19 Options Database Keys: 20 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 21 22 Developer Notes: 23 Subclasses include MATAIJCUSP, MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 24 enough exist. 25 26 Level: beginner 27 28 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ 29 M*/ 30 31 /*MC 32 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 33 34 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 35 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 36 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 37 for communicators controlling multiple processes. It is recommended that you call both of 38 the above preallocation routines for simplicity. 39 40 Options Database Keys: 41 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 42 43 Level: beginner 44 45 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL 46 M*/ 47 48 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 49 { 50 PetscErrorCode ierr; 51 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 52 53 PetscFunctionBegin; 54 if (mat->A) { 55 ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr); 56 ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr); 57 } 58 PetscFunctionReturn(0); 59 } 60 61 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows) 62 { 63 PetscErrorCode ierr; 64 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)M->data; 65 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data; 66 Mat_SeqAIJ *b = (Mat_SeqAIJ*)mat->B->data; 67 const PetscInt *ia,*ib; 68 const MatScalar *aa,*bb; 69 PetscInt na,nb,i,j,*rows,cnt=0,n0rows; 70 PetscInt m = M->rmap->n,rstart = M->rmap->rstart; 71 72 PetscFunctionBegin; 73 *keptrows = 0; 74 ia = a->i; 75 ib = b->i; 76 for (i=0; i<m; i++) { 77 na = ia[i+1] - ia[i]; 78 nb = ib[i+1] - ib[i]; 79 if (!na && !nb) { 80 cnt++; 81 goto ok1; 82 } 83 aa = a->a + ia[i]; 84 for (j=0; j<na; j++) { 85 if (aa[j] != 0.0) goto ok1; 86 } 87 bb = b->a + ib[i]; 88 for (j=0; j <nb; j++) { 89 if (bb[j] != 0.0) goto ok1; 90 } 91 cnt++; 92 ok1:; 93 } 94 ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr); 95 if (!n0rows) PetscFunctionReturn(0); 96 ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr); 97 cnt = 0; 98 for (i=0; i<m; i++) { 99 na = ia[i+1] - ia[i]; 100 nb = ib[i+1] - ib[i]; 101 if (!na && !nb) continue; 102 aa = a->a + ia[i]; 103 for (j=0; j<na;j++) { 104 if (aa[j] != 0.0) { 105 rows[cnt++] = rstart + i; 106 goto ok2; 107 } 108 } 109 bb = b->a + ib[i]; 110 for (j=0; j<nb; j++) { 111 if (bb[j] != 0.0) { 112 rows[cnt++] = rstart + i; 113 goto ok2; 114 } 115 } 116 ok2:; 117 } 118 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr); 119 PetscFunctionReturn(0); 120 } 121 122 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is) 123 { 124 PetscErrorCode ierr; 125 Mat_MPIAIJ *aij = (Mat_MPIAIJ*) Y->data; 126 PetscBool cong; 127 128 PetscFunctionBegin; 129 ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr); 130 if (Y->assembled && cong) { 131 ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr); 132 } else { 133 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 134 } 135 PetscFunctionReturn(0); 136 } 137 138 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows) 139 { 140 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)M->data; 141 PetscErrorCode ierr; 142 PetscInt i,rstart,nrows,*rows; 143 144 PetscFunctionBegin; 145 *zrows = NULL; 146 ierr = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr); 147 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 148 for (i=0; i<nrows; i++) rows[i] += rstart; 149 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr); 150 PetscFunctionReturn(0); 151 } 152 153 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms) 154 { 155 PetscErrorCode ierr; 156 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)A->data; 157 PetscInt i,n,*garray = aij->garray; 158 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ*) aij->A->data; 159 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ*) aij->B->data; 160 PetscReal *work; 161 162 PetscFunctionBegin; 163 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 164 ierr = PetscCalloc1(n,&work);CHKERRQ(ierr); 165 if (type == NORM_2) { 166 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 167 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]); 168 } 169 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 170 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]); 171 } 172 } else if (type == NORM_1) { 173 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 174 work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 175 } 176 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 177 work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 178 } 179 } else if (type == NORM_INFINITY) { 180 for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) { 181 work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 182 } 183 for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) { 184 work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]); 185 } 186 187 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType"); 188 if (type == NORM_INFINITY) { 189 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 190 } else { 191 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 192 } 193 ierr = PetscFree(work);CHKERRQ(ierr); 194 if (type == NORM_2) { 195 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 196 } 197 PetscFunctionReturn(0); 198 } 199 200 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is) 201 { 202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 203 IS sis,gis; 204 PetscErrorCode ierr; 205 const PetscInt *isis,*igis; 206 PetscInt n,*iis,nsis,ngis,rstart,i; 207 208 PetscFunctionBegin; 209 ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr); 210 ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr); 211 ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr); 212 ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr); 213 ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr); 214 ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr); 215 216 ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr); 217 ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr); 218 ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr); 219 n = ngis + nsis; 220 ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr); 221 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 222 for (i=0; i<n; i++) iis[i] += rstart; 223 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr); 224 225 ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr); 226 ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr); 227 ierr = ISDestroy(&sis);CHKERRQ(ierr); 228 ierr = ISDestroy(&gis);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 /* 233 Distributes a SeqAIJ matrix across a set of processes. Code stolen from 234 MatLoad_MPIAIJ(). Horrible lack of reuse. Should be a routine for each matrix type. 235 236 Only for square matrices 237 238 Used by a preconditioner, hence PETSC_EXTERN 239 */ 240 PETSC_EXTERN PetscErrorCode MatDistribute_MPIAIJ(MPI_Comm comm,Mat gmat,PetscInt m,MatReuse reuse,Mat *inmat) 241 { 242 PetscMPIInt rank,size; 243 PetscInt *rowners,*dlens,*olens,i,rstart,rend,j,jj,nz = 0,*gmataj,cnt,row,*ld,bses[2]; 244 PetscErrorCode ierr; 245 Mat mat; 246 Mat_SeqAIJ *gmata; 247 PetscMPIInt tag; 248 MPI_Status status; 249 PetscBool aij; 250 MatScalar *gmataa,*ao,*ad,*gmataarestore=0; 251 252 PetscFunctionBegin; 253 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 254 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 255 if (!rank) { 256 ierr = PetscObjectTypeCompare((PetscObject)gmat,MATSEQAIJ,&aij);CHKERRQ(ierr); 257 if (!aij) SETERRQ1(PetscObjectComm((PetscObject)gmat),PETSC_ERR_SUP,"Currently no support for input matrix of type %s\n",((PetscObject)gmat)->type_name); 258 } 259 if (reuse == MAT_INITIAL_MATRIX) { 260 ierr = MatCreate(comm,&mat);CHKERRQ(ierr); 261 ierr = MatSetSizes(mat,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 262 ierr = MatGetBlockSizes(gmat,&bses[0],&bses[1]);CHKERRQ(ierr); 263 ierr = MPI_Bcast(bses,2,MPIU_INT,0,comm);CHKERRQ(ierr); 264 ierr = MatSetBlockSizes(mat,bses[0],bses[1]);CHKERRQ(ierr); 265 ierr = MatSetType(mat,MATAIJ);CHKERRQ(ierr); 266 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 267 ierr = PetscMalloc2(m,&dlens,m,&olens);CHKERRQ(ierr); 268 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 269 270 rowners[0] = 0; 271 for (i=2; i<=size; i++) rowners[i] += rowners[i-1]; 272 rstart = rowners[rank]; 273 rend = rowners[rank+1]; 274 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 275 if (!rank) { 276 gmata = (Mat_SeqAIJ*) gmat->data; 277 /* send row lengths to all processors */ 278 for (i=0; i<m; i++) dlens[i] = gmata->ilen[i]; 279 for (i=1; i<size; i++) { 280 ierr = MPI_Send(gmata->ilen + rowners[i],rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 281 } 282 /* determine number diagonal and off-diagonal counts */ 283 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 284 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 285 jj = 0; 286 for (i=0; i<m; i++) { 287 for (j=0; j<dlens[i]; j++) { 288 if (gmata->j[jj] < rstart) ld[i]++; 289 if (gmata->j[jj] < rstart || gmata->j[jj] >= rend) olens[i]++; 290 jj++; 291 } 292 } 293 /* send column indices to other processes */ 294 for (i=1; i<size; i++) { 295 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 296 ierr = MPI_Send(&nz,1,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 297 ierr = MPI_Send(gmata->j + gmata->i[rowners[i]],nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 298 } 299 300 /* send numerical values to other processes */ 301 for (i=1; i<size; i++) { 302 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 303 ierr = MPI_Send(gmata->a + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 304 } 305 gmataa = gmata->a; 306 gmataj = gmata->j; 307 308 } else { 309 /* receive row lengths */ 310 ierr = MPI_Recv(dlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 311 /* receive column indices */ 312 ierr = MPI_Recv(&nz,1,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 313 ierr = PetscMalloc2(nz,&gmataa,nz,&gmataj);CHKERRQ(ierr); 314 ierr = MPI_Recv(gmataj,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr); 315 /* determine number diagonal and off-diagonal counts */ 316 ierr = PetscArrayzero(olens,m);CHKERRQ(ierr); 317 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 318 jj = 0; 319 for (i=0; i<m; i++) { 320 for (j=0; j<dlens[i]; j++) { 321 if (gmataj[jj] < rstart) ld[i]++; 322 if (gmataj[jj] < rstart || gmataj[jj] >= rend) olens[i]++; 323 jj++; 324 } 325 } 326 /* receive numerical values */ 327 ierr = PetscArrayzero(gmataa,nz);CHKERRQ(ierr); 328 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 329 } 330 /* set preallocation */ 331 for (i=0; i<m; i++) { 332 dlens[i] -= olens[i]; 333 } 334 ierr = MatSeqAIJSetPreallocation(mat,0,dlens);CHKERRQ(ierr); 335 ierr = MatMPIAIJSetPreallocation(mat,0,dlens,0,olens);CHKERRQ(ierr); 336 337 for (i=0; i<m; i++) { 338 dlens[i] += olens[i]; 339 } 340 cnt = 0; 341 for (i=0; i<m; i++) { 342 row = rstart + i; 343 ierr = MatSetValues(mat,1,&row,dlens[i],gmataj+cnt,gmataa+cnt,INSERT_VALUES);CHKERRQ(ierr); 344 cnt += dlens[i]; 345 } 346 if (rank) { 347 ierr = PetscFree2(gmataa,gmataj);CHKERRQ(ierr); 348 } 349 ierr = PetscFree2(dlens,olens);CHKERRQ(ierr); 350 ierr = PetscFree(rowners);CHKERRQ(ierr); 351 352 ((Mat_MPIAIJ*)(mat->data))->ld = ld; 353 354 *inmat = mat; 355 } else { /* column indices are already set; only need to move over numerical values from process 0 */ 356 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->A->data; 357 Mat_SeqAIJ *Ao = (Mat_SeqAIJ*)((Mat_MPIAIJ*)((*inmat)->data))->B->data; 358 mat = *inmat; 359 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag);CHKERRQ(ierr); 360 if (!rank) { 361 /* send numerical values to other processes */ 362 gmata = (Mat_SeqAIJ*) gmat->data; 363 ierr = MatGetOwnershipRanges(mat,(const PetscInt**)&rowners);CHKERRQ(ierr); 364 gmataa = gmata->a; 365 for (i=1; i<size; i++) { 366 nz = gmata->i[rowners[i+1]]-gmata->i[rowners[i]]; 367 ierr = MPI_Send(gmataa + gmata->i[rowners[i]],nz,MPIU_SCALAR,i,tag,comm);CHKERRQ(ierr); 368 } 369 nz = gmata->i[rowners[1]]-gmata->i[rowners[0]]; 370 } else { 371 /* receive numerical values from process 0*/ 372 nz = Ad->nz + Ao->nz; 373 ierr = PetscMalloc1(nz,&gmataa);CHKERRQ(ierr); gmataarestore = gmataa; 374 ierr = MPI_Recv(gmataa,nz,MPIU_SCALAR,0,tag,comm,&status);CHKERRQ(ierr); 375 } 376 /* transfer numerical values into the diagonal A and off diagonal B parts of mat */ 377 ld = ((Mat_MPIAIJ*)(mat->data))->ld; 378 ad = Ad->a; 379 ao = Ao->a; 380 if (mat->rmap->n) { 381 i = 0; 382 nz = ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 383 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 384 } 385 for (i=1; i<mat->rmap->n; i++) { 386 nz = Ao->i[i] - Ao->i[i-1] - ld[i-1] + ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); ao += nz; gmataa += nz; 387 nz = Ad->i[i+1] - Ad->i[i]; ierr = PetscArraycpy(ad,gmataa,nz);CHKERRQ(ierr); ad += nz; gmataa += nz; 388 } 389 i--; 390 if (mat->rmap->n) { 391 nz = Ao->i[i+1] - Ao->i[i] - ld[i]; ierr = PetscArraycpy(ao,gmataa,nz);CHKERRQ(ierr); 392 } 393 if (rank) { 394 ierr = PetscFree(gmataarestore);CHKERRQ(ierr); 395 } 396 } 397 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 398 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 399 PetscFunctionReturn(0); 400 } 401 402 /* 403 Local utility routine that creates a mapping from the global column 404 number to the local number in the off-diagonal part of the local 405 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 406 a slightly higher hash table cost; without it it is not scalable (each processor 407 has an order N integer array but is fast to acess. 408 */ 409 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 410 { 411 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 412 PetscErrorCode ierr; 413 PetscInt n = aij->B->cmap->n,i; 414 415 PetscFunctionBegin; 416 if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray"); 417 #if defined(PETSC_USE_CTABLE) 418 ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 419 for (i=0; i<n; i++) { 420 ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 421 } 422 #else 423 ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr); 424 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr); 425 for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1; 426 #endif 427 PetscFunctionReturn(0); 428 } 429 430 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol) \ 431 { \ 432 if (col <= lastcol1) low1 = 0; \ 433 else high1 = nrow1; \ 434 lastcol1 = col;\ 435 while (high1-low1 > 5) { \ 436 t = (low1+high1)/2; \ 437 if (rp1[t] > col) high1 = t; \ 438 else low1 = t; \ 439 } \ 440 for (_i=low1; _i<high1; _i++) { \ 441 if (rp1[_i] > col) break; \ 442 if (rp1[_i] == col) { \ 443 if (addv == ADD_VALUES) { \ 444 ap1[_i] += value; \ 445 /* Not sure LogFlops will slow dow the code or not */ \ 446 (void)PetscLogFlops(1.0); \ 447 } \ 448 else ap1[_i] = value; \ 449 goto a_noinsert; \ 450 } \ 451 } \ 452 if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \ 453 if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;} \ 454 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 455 MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \ 456 N = nrow1++ - 1; a->nz++; high1++; \ 457 /* shift up all the later entries in this row */ \ 458 ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\ 459 ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\ 460 rp1[_i] = col; \ 461 ap1[_i] = value; \ 462 A->nonzerostate++;\ 463 a_noinsert: ; \ 464 ailen[row] = nrow1; \ 465 } 466 467 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \ 468 { \ 469 if (col <= lastcol2) low2 = 0; \ 470 else high2 = nrow2; \ 471 lastcol2 = col; \ 472 while (high2-low2 > 5) { \ 473 t = (low2+high2)/2; \ 474 if (rp2[t] > col) high2 = t; \ 475 else low2 = t; \ 476 } \ 477 for (_i=low2; _i<high2; _i++) { \ 478 if (rp2[_i] > col) break; \ 479 if (rp2[_i] == col) { \ 480 if (addv == ADD_VALUES) { \ 481 ap2[_i] += value; \ 482 (void)PetscLogFlops(1.0); \ 483 } \ 484 else ap2[_i] = value; \ 485 goto b_noinsert; \ 486 } \ 487 } \ 488 if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 489 if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;} \ 490 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \ 491 MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \ 492 N = nrow2++ - 1; b->nz++; high2++; \ 493 /* shift up all the later entries in this row */ \ 494 ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\ 495 ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\ 496 rp2[_i] = col; \ 497 ap2[_i] = value; \ 498 B->nonzerostate++; \ 499 b_noinsert: ; \ 500 bilen[row] = nrow2; \ 501 } 502 503 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[]) 504 { 505 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data; 506 Mat_SeqAIJ *a = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data; 507 PetscErrorCode ierr; 508 PetscInt l,*garray = mat->garray,diag; 509 510 PetscFunctionBegin; 511 /* code only works for square matrices A */ 512 513 /* find size of row to the left of the diagonal part */ 514 ierr = MatGetOwnershipRange(A,&diag,0);CHKERRQ(ierr); 515 row = row - diag; 516 for (l=0; l<b->i[row+1]-b->i[row]; l++) { 517 if (garray[b->j[b->i[row]+l]] > diag) break; 518 } 519 ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr); 520 521 /* diagonal part */ 522 ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr); 523 524 /* right of diagonal part */ 525 ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr); 526 PetscFunctionReturn(0); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 532 PetscScalar value = 0.0; 533 PetscErrorCode ierr; 534 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 535 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 536 PetscBool roworiented = aij->roworiented; 537 538 /* Some Variables required in the macro */ 539 Mat A = aij->A; 540 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 541 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 542 MatScalar *aa = a->a; 543 PetscBool ignorezeroentries = a->ignorezeroentries; 544 Mat B = aij->B; 545 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 546 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 547 MatScalar *ba = b->a; 548 549 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 550 PetscInt nonew; 551 MatScalar *ap1,*ap2; 552 553 PetscFunctionBegin; 554 for (i=0; i<m; i++) { 555 if (im[i] < 0) continue; 556 #if defined(PETSC_USE_DEBUG) 557 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 558 #endif 559 if (im[i] >= rstart && im[i] < rend) { 560 row = im[i] - rstart; 561 lastcol1 = -1; 562 rp1 = aj + ai[row]; 563 ap1 = aa + ai[row]; 564 rmax1 = aimax[row]; 565 nrow1 = ailen[row]; 566 low1 = 0; 567 high1 = nrow1; 568 lastcol2 = -1; 569 rp2 = bj + bi[row]; 570 ap2 = ba + bi[row]; 571 rmax2 = bimax[row]; 572 nrow2 = bilen[row]; 573 low2 = 0; 574 high2 = nrow2; 575 576 for (j=0; j<n; j++) { 577 if (v) value = roworiented ? v[i*n+j] : v[i+j*m]; 578 if (in[j] >= cstart && in[j] < cend) { 579 col = in[j] - cstart; 580 nonew = a->nonew; 581 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 582 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 583 } else if (in[j] < 0) continue; 584 #if defined(PETSC_USE_DEBUG) 585 else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1); 586 #endif 587 else { 588 if (mat->was_assembled) { 589 if (!aij->colmap) { 590 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 591 } 592 #if defined(PETSC_USE_CTABLE) 593 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 594 col--; 595 #else 596 col = aij->colmap[in[j]] - 1; 597 #endif 598 if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) { 599 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 600 col = in[j]; 601 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 602 B = aij->B; 603 b = (Mat_SeqAIJ*)B->data; 604 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a; 605 rp2 = bj + bi[row]; 606 ap2 = ba + bi[row]; 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0) { 614 if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) { 615 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr); 616 } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]); 617 } 618 } else col = in[j]; 619 nonew = b->nonew; 620 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 621 } 622 } 623 } else { 624 if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]); 625 if (!aij->donotstash) { 626 mat->assembled = PETSC_FALSE; 627 if (roworiented) { 628 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 629 } else { 630 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 631 } 632 } 633 } 634 } 635 PetscFunctionReturn(0); 636 } 637 638 /* 639 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 640 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 641 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 642 */ 643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[]) 644 { 645 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 646 Mat A = aij->A; /* diagonal part of the matrix */ 647 Mat B = aij->B; /* offdiagonal part of the matrix */ 648 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 649 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 650 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,col; 651 PetscInt *ailen = a->ilen,*aj = a->j; 652 PetscInt *bilen = b->ilen,*bj = b->j; 653 PetscInt am = aij->A->rmap->n,j; 654 PetscInt diag_so_far = 0,dnz; 655 PetscInt offd_so_far = 0,onz; 656 657 PetscFunctionBegin; 658 /* Iterate over all rows of the matrix */ 659 for (j=0; j<am; j++) { 660 dnz = onz = 0; 661 /* Iterate over all non-zero columns of the current row */ 662 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 663 /* If column is in the diagonal */ 664 if (mat_j[col] >= cstart && mat_j[col] < cend) { 665 aj[diag_so_far++] = mat_j[col] - cstart; 666 dnz++; 667 } else { /* off-diagonal entries */ 668 bj[offd_so_far++] = mat_j[col]; 669 onz++; 670 } 671 } 672 ailen[j] = dnz; 673 bilen[j] = onz; 674 } 675 PetscFunctionReturn(0); 676 } 677 678 /* 679 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 680 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 681 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 682 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 683 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 684 */ 685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[]) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 688 Mat A = aij->A; /* diagonal part of the matrix */ 689 Mat B = aij->B; /* offdiagonal part of the matrix */ 690 Mat_SeqAIJ *aijd =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data; 691 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 692 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 693 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend; 694 PetscInt *ailen = a->ilen,*aj = a->j; 695 PetscInt *bilen = b->ilen,*bj = b->j; 696 PetscInt am = aij->A->rmap->n,j; 697 PetscInt *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 698 PetscInt col,dnz_row,onz_row,rowstart_diag,rowstart_offd; 699 PetscScalar *aa = a->a,*ba = b->a; 700 701 PetscFunctionBegin; 702 /* Iterate over all rows of the matrix */ 703 for (j=0; j<am; j++) { 704 dnz_row = onz_row = 0; 705 rowstart_offd = full_offd_i[j]; 706 rowstart_diag = full_diag_i[j]; 707 /* Iterate over all non-zero columns of the current row */ 708 for (col=mat_i[j]; col<mat_i[j+1]; col++) { 709 /* If column is in the diagonal */ 710 if (mat_j[col] >= cstart && mat_j[col] < cend) { 711 aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 712 aa[rowstart_diag+dnz_row] = mat_a[col]; 713 dnz_row++; 714 } else { /* off-diagonal entries */ 715 bj[rowstart_offd+onz_row] = mat_j[col]; 716 ba[rowstart_offd+onz_row] = mat_a[col]; 717 onz_row++; 718 } 719 } 720 ailen[j] = dnz_row; 721 bilen[j] = onz_row; 722 } 723 PetscFunctionReturn(0); 724 } 725 726 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 729 PetscErrorCode ierr; 730 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 731 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 732 733 PetscFunctionBegin; 734 for (i=0; i<m; i++) { 735 if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/ 736 if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1); 737 if (idxm[i] >= rstart && idxm[i] < rend) { 738 row = idxm[i] - rstart; 739 for (j=0; j<n; j++) { 740 if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */ 741 if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1); 742 if (idxn[j] >= cstart && idxn[j] < cend) { 743 col = idxn[j] - cstart; 744 ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 745 } else { 746 if (!aij->colmap) { 747 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 748 } 749 #if defined(PETSC_USE_CTABLE) 750 ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr); 751 col--; 752 #else 753 col = aij->colmap[idxn[j]] - 1; 754 #endif 755 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0; 756 else { 757 ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr); 758 } 759 } 760 } 761 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 762 } 763 PetscFunctionReturn(0); 764 } 765 766 extern PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat,Vec,Vec); 767 768 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode) 769 { 770 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 771 PetscErrorCode ierr; 772 PetscInt nstash,reallocs; 773 774 PetscFunctionBegin; 775 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 776 777 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 778 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 779 ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 780 PetscFunctionReturn(0); 781 } 782 783 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode) 784 { 785 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 786 Mat_SeqAIJ *a = (Mat_SeqAIJ*)aij->A->data; 787 PetscErrorCode ierr; 788 PetscMPIInt n; 789 PetscInt i,j,rstart,ncols,flg; 790 PetscInt *row,*col; 791 PetscBool other_disassembled; 792 PetscScalar *val; 793 794 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 795 796 PetscFunctionBegin; 797 if (!aij->donotstash && !mat->nooffprocentries) { 798 while (1) { 799 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 800 if (!flg) break; 801 802 for (i=0; i<n; ) { 803 /* Now identify the consecutive vals belonging to the same row */ 804 for (j=i,rstart=row[j]; j<n; j++) { 805 if (row[j] != rstart) break; 806 } 807 if (j < n) ncols = j-i; 808 else ncols = n-i; 809 /* Now assemble all these values with a single function call */ 810 ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 811 812 i = j; 813 } 814 } 815 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 816 } 817 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 818 if (mat->valid_GPU_matrix == PETSC_OFFLOAD_CPU) aij->A->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 819 #endif 820 ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr); 821 ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr); 822 823 /* determine if any processor has disassembled, if so we must 824 also disassemble ourself, in order that we may reassemble. */ 825 /* 826 if nonzero structure of submatrix B cannot change then we know that 827 no processor disassembled thus we can skip this stuff 828 */ 829 if (!((Mat_SeqAIJ*)aij->B->data)->nonew) { 830 ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 831 if (mat->was_assembled && !other_disassembled) { 832 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 833 aij->B->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */ 834 #endif 835 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 836 } 837 } 838 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 839 ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr); 840 } 841 ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr); 842 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 843 if (mat->valid_GPU_matrix == PETSC_OFFLOAD_CPU && aij->B->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) aij->B->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 844 #endif 845 ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr); 846 ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr); 847 848 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 849 850 aij->rowvalues = 0; 851 852 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 853 if (a->inode.size) mat->ops->multdiagonalblock = MatMultDiagonalBlock_MPIAIJ; 854 855 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 856 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 857 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 858 ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 859 } 860 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 861 mat->valid_GPU_matrix = PETSC_OFFLOAD_BOTH; 862 #endif 863 PetscFunctionReturn(0); 864 } 865 866 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 867 { 868 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 869 PetscErrorCode ierr; 870 871 PetscFunctionBegin; 872 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 873 ierr = MatZeroEntries(l->B);CHKERRQ(ierr); 874 PetscFunctionReturn(0); 875 } 876 877 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 878 { 879 Mat_MPIAIJ *mat = (Mat_MPIAIJ *) A->data; 880 PetscObjectState sA, sB; 881 PetscInt *lrows; 882 PetscInt r, len; 883 PetscBool cong, lch, gch; 884 PetscErrorCode ierr; 885 886 PetscFunctionBegin; 887 /* get locally owned rows */ 888 ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr); 889 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 890 /* fix right hand side if needed */ 891 if (x && b) { 892 const PetscScalar *xx; 893 PetscScalar *bb; 894 895 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 896 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 897 ierr = VecGetArray(b, &bb);CHKERRQ(ierr); 898 for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]]; 899 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 900 ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr); 901 } 902 903 sA = mat->A->nonzerostate; 904 sB = mat->B->nonzerostate; 905 906 if (diag != 0.0 && cong) { 907 ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr); 908 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 909 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 910 Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data; 911 Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data; 912 PetscInt nnwA, nnwB; 913 PetscBool nnzA, nnzB; 914 915 nnwA = aijA->nonew; 916 nnwB = aijB->nonew; 917 nnzA = aijA->keepnonzeropattern; 918 nnzB = aijB->keepnonzeropattern; 919 if (!nnzA) { 920 ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr); 921 aijA->nonew = 0; 922 } 923 if (!nnzB) { 924 ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr); 925 aijB->nonew = 0; 926 } 927 /* Must zero here before the next loop */ 928 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 929 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 930 for (r = 0; r < len; ++r) { 931 const PetscInt row = lrows[r] + A->rmap->rstart; 932 if (row >= A->cmap->N) continue; 933 ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr); 934 } 935 aijA->nonew = nnwA; 936 aijB->nonew = nnwB; 937 } else { 938 ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 939 ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr); 940 } 941 ierr = PetscFree(lrows);CHKERRQ(ierr); 942 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 943 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 944 945 /* reduce nonzerostate */ 946 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 947 ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 948 if (gch) A->nonzerostate++; 949 PetscFunctionReturn(0); 950 } 951 952 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 953 { 954 Mat_MPIAIJ *l = (Mat_MPIAIJ*)A->data; 955 PetscErrorCode ierr; 956 PetscMPIInt n = A->rmap->n; 957 PetscInt i,j,r,m,p = 0,len = 0; 958 PetscInt *lrows,*owners = A->rmap->range; 959 PetscSFNode *rrows; 960 PetscSF sf; 961 const PetscScalar *xx; 962 PetscScalar *bb,*mask; 963 Vec xmask,lmask; 964 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)l->B->data; 965 const PetscInt *aj, *ii,*ridx; 966 PetscScalar *aa; 967 968 PetscFunctionBegin; 969 /* Create SF where leaves are input rows and roots are owned rows */ 970 ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr); 971 for (r = 0; r < n; ++r) lrows[r] = -1; 972 ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr); 973 for (r = 0; r < N; ++r) { 974 const PetscInt idx = rows[r]; 975 if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N); 976 if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */ 977 ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr); 978 } 979 rrows[r].rank = p; 980 rrows[r].index = rows[r] - owners[p]; 981 } 982 ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr); 983 ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr); 984 /* Collect flags for rows to be zeroed */ 985 ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 986 ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr); 987 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 988 /* Compress and put in row numbers */ 989 for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r; 990 /* zero diagonal part of matrix */ 991 ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr); 992 /* handle off diagonal part of matrix */ 993 ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr); 994 ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr); 995 ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr); 996 for (i=0; i<len; i++) bb[lrows[i]] = 1; 997 ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr); 998 ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1000 ierr = VecDestroy(&xmask);CHKERRQ(ierr); 1001 if (x && b) { /* this code is buggy when the row and column layout don't match */ 1002 PetscBool cong; 1003 1004 ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr); 1005 if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout"); 1006 ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1007 ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1009 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 1010 } 1011 ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr); 1012 /* remove zeroed rows of off diagonal matrix */ 1013 ii = aij->i; 1014 for (i=0; i<len; i++) { 1015 ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr); 1016 } 1017 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1018 if (aij->compressedrow.use) { 1019 m = aij->compressedrow.nrows; 1020 ii = aij->compressedrow.i; 1021 ridx = aij->compressedrow.rindex; 1022 for (i=0; i<m; i++) { 1023 n = ii[i+1] - ii[i]; 1024 aj = aij->j + ii[i]; 1025 aa = aij->a + ii[i]; 1026 1027 for (j=0; j<n; j++) { 1028 if (PetscAbsScalar(mask[*aj])) { 1029 if (b) bb[*ridx] -= *aa*xx[*aj]; 1030 *aa = 0.0; 1031 } 1032 aa++; 1033 aj++; 1034 } 1035 ridx++; 1036 } 1037 } else { /* do not use compressed row format */ 1038 m = l->B->rmap->n; 1039 for (i=0; i<m; i++) { 1040 n = ii[i+1] - ii[i]; 1041 aj = aij->j + ii[i]; 1042 aa = aij->a + ii[i]; 1043 for (j=0; j<n; j++) { 1044 if (PetscAbsScalar(mask[*aj])) { 1045 if (b) bb[i] -= *aa*xx[*aj]; 1046 *aa = 0.0; 1047 } 1048 aa++; 1049 aj++; 1050 } 1051 } 1052 } 1053 if (x && b) { 1054 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 1055 ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr); 1056 } 1057 ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr); 1058 ierr = VecDestroy(&lmask);CHKERRQ(ierr); 1059 ierr = PetscFree(lrows);CHKERRQ(ierr); 1060 1061 /* only change matrix nonzero state if pattern was allowed to be changed */ 1062 if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) { 1063 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1064 ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1065 } 1066 PetscFunctionReturn(0); 1067 } 1068 1069 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy) 1070 { 1071 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1072 PetscErrorCode ierr; 1073 PetscInt nt; 1074 VecScatter Mvctx = a->Mvctx; 1075 1076 PetscFunctionBegin; 1077 ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr); 1078 if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt); 1079 1080 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1081 ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr); 1082 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1083 ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr); 1084 PetscFunctionReturn(0); 1085 } 1086 1087 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx) 1088 { 1089 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1090 PetscErrorCode ierr; 1091 1092 PetscFunctionBegin; 1093 ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr); 1094 PetscFunctionReturn(0); 1095 } 1096 1097 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1098 { 1099 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1100 PetscErrorCode ierr; 1101 VecScatter Mvctx = a->Mvctx; 1102 1103 PetscFunctionBegin; 1104 if (a->Mvctx_mpi1_flg) Mvctx = a->Mvctx_mpi1; 1105 ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1106 ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1107 ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr); 1109 PetscFunctionReturn(0); 1110 } 1111 1112 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy) 1113 { 1114 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1115 PetscErrorCode ierr; 1116 1117 PetscFunctionBegin; 1118 /* do nondiagonal part */ 1119 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1120 /* do local part */ 1121 ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr); 1122 /* add partial results together */ 1123 ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1124 ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool *f) 1129 { 1130 MPI_Comm comm; 1131 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*) Amat->data, *Bij; 1132 Mat Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs; 1133 IS Me,Notme; 1134 PetscErrorCode ierr; 1135 PetscInt M,N,first,last,*notme,i; 1136 PetscBool lf; 1137 PetscMPIInt size; 1138 1139 PetscFunctionBegin; 1140 /* Easy test: symmetric diagonal block */ 1141 Bij = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A; 1142 ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr); 1143 ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr); 1144 if (!*f) PetscFunctionReturn(0); 1145 ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr); 1146 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 1147 if (size == 1) PetscFunctionReturn(0); 1148 1149 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1150 ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr); 1151 ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr); 1152 ierr = PetscMalloc1(N-last+first,¬me);CHKERRQ(ierr); 1153 for (i=0; i<first; i++) notme[i] = i; 1154 for (i=last; i<M; i++) notme[i-last+first] = i; 1155 ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr); 1156 ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr); 1157 ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr); 1158 Aoff = Aoffs[0]; 1159 ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr); 1160 Boff = Boffs[0]; 1161 ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr); 1162 ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr); 1163 ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr); 1164 ierr = ISDestroy(&Me);CHKERRQ(ierr); 1165 ierr = ISDestroy(&Notme);CHKERRQ(ierr); 1166 ierr = PetscFree(notme);CHKERRQ(ierr); 1167 PetscFunctionReturn(0); 1168 } 1169 1170 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool *f) 1171 { 1172 PetscErrorCode ierr; 1173 1174 PetscFunctionBegin; 1175 ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1180 { 1181 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1182 PetscErrorCode ierr; 1183 1184 PetscFunctionBegin; 1185 /* do nondiagonal part */ 1186 ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr); 1187 /* do local part */ 1188 ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr); 1189 /* add partial results together */ 1190 ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1191 ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 /* 1196 This only works correctly for square matrices where the subblock A->A is the 1197 diagonal block 1198 */ 1199 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v) 1200 { 1201 PetscErrorCode ierr; 1202 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1203 1204 PetscFunctionBegin; 1205 if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); 1206 if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition"); 1207 ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr); 1208 PetscFunctionReturn(0); 1209 } 1210 1211 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa) 1212 { 1213 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1214 PetscErrorCode ierr; 1215 1216 PetscFunctionBegin; 1217 ierr = MatScale(a->A,aa);CHKERRQ(ierr); 1218 ierr = MatScale(a->B,aa);CHKERRQ(ierr); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 1223 { 1224 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1225 PetscErrorCode ierr; 1226 1227 PetscFunctionBegin; 1228 #if defined(PETSC_USE_LOG) 1229 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 1230 #endif 1231 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 1232 ierr = VecDestroy(&aij->diag);CHKERRQ(ierr); 1233 ierr = MatDestroy(&aij->A);CHKERRQ(ierr); 1234 ierr = MatDestroy(&aij->B);CHKERRQ(ierr); 1235 #if defined(PETSC_USE_CTABLE) 1236 ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr); 1237 #else 1238 ierr = PetscFree(aij->colmap);CHKERRQ(ierr); 1239 #endif 1240 ierr = PetscFree(aij->garray);CHKERRQ(ierr); 1241 ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr); 1242 ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr); 1243 if (aij->Mvctx_mpi1) {ierr = VecScatterDestroy(&aij->Mvctx_mpi1);CHKERRQ(ierr);} 1244 ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr); 1245 ierr = PetscFree(aij->ld);CHKERRQ(ierr); 1246 ierr = PetscFree(mat->data);CHKERRQ(ierr); 1247 1248 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1250 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1252 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1253 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr); 1254 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1255 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr); 1256 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr); 1257 #if defined(PETSC_HAVE_ELEMENTAL) 1258 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr); 1259 #endif 1260 #if defined(PETSC_HAVE_HYPRE) 1261 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1262 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMatMatMult_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr); 1263 #endif 1264 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr); 1265 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpiaij_C",NULL);CHKERRQ(ierr); 1266 PetscFunctionReturn(0); 1267 } 1268 1269 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer) 1270 { 1271 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1272 Mat_SeqAIJ *A = (Mat_SeqAIJ*)aij->A->data; 1273 Mat_SeqAIJ *B = (Mat_SeqAIJ*)aij->B->data; 1274 PetscErrorCode ierr; 1275 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 1276 int fd; 1277 PetscInt nz,header[4],*row_lengths,*range=0,rlen,i; 1278 PetscInt nzmax,*column_indices,j,k,col,*garray = aij->garray,cnt,cstart = mat->cmap->rstart,rnz = 0; 1279 PetscScalar *column_values; 1280 PetscInt message_count,flowcontrolcount; 1281 FILE *file; 1282 1283 PetscFunctionBegin; 1284 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1285 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 1286 nz = A->nz + B->nz; 1287 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1288 if (!rank) { 1289 header[0] = MAT_FILE_CLASSID; 1290 header[1] = mat->rmap->N; 1291 header[2] = mat->cmap->N; 1292 1293 ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1294 ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1295 /* get largest number of rows any processor has */ 1296 rlen = mat->rmap->n; 1297 range = mat->rmap->range; 1298 for (i=1; i<size; i++) rlen = PetscMax(rlen,range[i+1] - range[i]); 1299 } else { 1300 ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1301 rlen = mat->rmap->n; 1302 } 1303 1304 /* load up the local row counts */ 1305 ierr = PetscMalloc1(rlen+1,&row_lengths);CHKERRQ(ierr); 1306 for (i=0; i<mat->rmap->n; i++) row_lengths[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]; 1307 1308 /* store the row lengths to the file */ 1309 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1310 if (!rank) { 1311 ierr = PetscBinaryWrite(fd,row_lengths,mat->rmap->n,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1312 for (i=1; i<size; i++) { 1313 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1314 rlen = range[i+1] - range[i]; 1315 ierr = MPIULong_Recv(row_lengths,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1316 ierr = PetscBinaryWrite(fd,row_lengths,rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1317 } 1318 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1319 } else { 1320 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1321 ierr = MPIULong_Send(row_lengths,mat->rmap->n,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1322 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1323 } 1324 ierr = PetscFree(row_lengths);CHKERRQ(ierr); 1325 1326 /* load up the local column indices */ 1327 nzmax = nz; /* th processor needs space a largest processor needs */ 1328 ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1329 ierr = PetscMalloc1(nzmax+1,&column_indices);CHKERRQ(ierr); 1330 cnt = 0; 1331 for (i=0; i<mat->rmap->n; i++) { 1332 for (j=B->i[i]; j<B->i[i+1]; j++) { 1333 if ((col = garray[B->j[j]]) > cstart) break; 1334 column_indices[cnt++] = col; 1335 } 1336 for (k=A->i[i]; k<A->i[i+1]; k++) column_indices[cnt++] = A->j[k] + cstart; 1337 for (; j<B->i[i+1]; j++) column_indices[cnt++] = garray[B->j[j]]; 1338 } 1339 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1340 1341 /* store the column indices to the file */ 1342 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1343 if (!rank) { 1344 MPI_Status status; 1345 ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1346 for (i=1; i<size; i++) { 1347 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1348 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1349 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1350 ierr = MPIULong_Recv(column_indices,rnz,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1351 ierr = PetscBinaryWrite(fd,column_indices,rnz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1352 } 1353 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1354 } else { 1355 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1356 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1357 ierr = MPIULong_Send(column_indices,nz,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1358 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1359 } 1360 ierr = PetscFree(column_indices);CHKERRQ(ierr); 1361 1362 /* load up the local column values */ 1363 ierr = PetscMalloc1(nzmax+1,&column_values);CHKERRQ(ierr); 1364 cnt = 0; 1365 for (i=0; i<mat->rmap->n; i++) { 1366 for (j=B->i[i]; j<B->i[i+1]; j++) { 1367 if (garray[B->j[j]] > cstart) break; 1368 column_values[cnt++] = B->a[j]; 1369 } 1370 for (k=A->i[i]; k<A->i[i+1]; k++) column_values[cnt++] = A->a[k]; 1371 for (; j<B->i[i+1]; j++) column_values[cnt++] = B->a[j]; 1372 } 1373 if (cnt != A->nz + B->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,A->nz+B->nz); 1374 1375 /* store the column values to the file */ 1376 ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr); 1377 if (!rank) { 1378 MPI_Status status; 1379 ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1380 for (i=1; i<size; i++) { 1381 ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr); 1382 ierr = MPI_Recv(&rnz,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr); 1383 if (rnz > nzmax) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: nz = %D nzmax = %D",nz,nzmax); 1384 ierr = MPIULong_Recv(column_values,rnz,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1385 ierr = PetscBinaryWrite(fd,column_values,rnz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr); 1386 } 1387 ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr); 1388 } else { 1389 ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr); 1390 ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1391 ierr = MPIULong_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1392 ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr); 1393 } 1394 ierr = PetscFree(column_values);CHKERRQ(ierr); 1395 1396 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1397 if (file) fprintf(file,"-matload_block_size %d\n",(int)PetscAbs(mat->rmap->bs)); 1398 PetscFunctionReturn(0); 1399 } 1400 1401 #include <petscdraw.h> 1402 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 1403 { 1404 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1405 PetscErrorCode ierr; 1406 PetscMPIInt rank = aij->rank,size = aij->size; 1407 PetscBool isdraw,iascii,isbinary; 1408 PetscViewer sviewer; 1409 PetscViewerFormat format; 1410 1411 PetscFunctionBegin; 1412 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1413 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1414 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1415 if (iascii) { 1416 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1417 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1418 PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz; 1419 ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr); 1420 ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1421 for (i=0; i<(PetscInt)size; i++) { 1422 nmax = PetscMax(nmax,nz[i]); 1423 nmin = PetscMin(nmin,nz[i]); 1424 navg += nz[i]; 1425 } 1426 ierr = PetscFree(nz);CHKERRQ(ierr); 1427 navg = navg/size; 1428 ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D avg %D max %D\n",nmin,navg,nmax);CHKERRQ(ierr); 1429 PetscFunctionReturn(0); 1430 } 1431 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1432 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1433 MatInfo info; 1434 PetscBool inodes; 1435 1436 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 1437 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 1438 ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr); 1439 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 1440 if (!inodes) { 1441 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n", 1442 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1443 } else { 1444 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n", 1445 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr); 1446 } 1447 ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr); 1448 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1449 ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr); 1450 ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr); 1451 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1452 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 1453 ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr); 1454 ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr); 1455 PetscFunctionReturn(0); 1456 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1457 PetscInt inodecount,inodelimit,*inodes; 1458 ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr); 1459 if (inodes) { 1460 ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr); 1461 } else { 1462 ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr); 1463 } 1464 PetscFunctionReturn(0); 1465 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1466 PetscFunctionReturn(0); 1467 } 1468 } else if (isbinary) { 1469 if (size == 1) { 1470 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1471 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1472 } else { 1473 ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr); 1474 } 1475 PetscFunctionReturn(0); 1476 } else if (iascii && size == 1) { 1477 ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr); 1478 ierr = MatView(aij->A,viewer);CHKERRQ(ierr); 1479 PetscFunctionReturn(0); 1480 } else if (isdraw) { 1481 PetscDraw draw; 1482 PetscBool isnull; 1483 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1484 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1485 if (isnull) PetscFunctionReturn(0); 1486 } 1487 1488 { /* assemble the entire matrix onto first processor */ 1489 Mat A = NULL, Av; 1490 IS isrow,iscol; 1491 1492 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1493 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1494 ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1495 ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr); 1496 /* The commented code uses MatCreateSubMatrices instead */ 1497 /* 1498 Mat *AA, A = NULL, Av; 1499 IS isrow,iscol; 1500 1501 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr); 1502 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr); 1503 ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr); 1504 if (!rank) { 1505 ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr); 1506 A = AA[0]; 1507 Av = AA[0]; 1508 } 1509 ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr); 1510 */ 1511 ierr = ISDestroy(&iscol);CHKERRQ(ierr); 1512 ierr = ISDestroy(&isrow);CHKERRQ(ierr); 1513 /* 1514 Everyone has to call to draw the matrix since the graphics waits are 1515 synchronized across all processors that share the PetscDraw object 1516 */ 1517 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1518 if (!rank) { 1519 if (((PetscObject)mat)->name) { 1520 ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr); 1521 } 1522 ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr); 1523 } 1524 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 1525 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1526 ierr = MatDestroy(&A);CHKERRQ(ierr); 1527 } 1528 PetscFunctionReturn(0); 1529 } 1530 1531 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer) 1532 { 1533 PetscErrorCode ierr; 1534 PetscBool iascii,isdraw,issocket,isbinary; 1535 1536 PetscFunctionBegin; 1537 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1538 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1539 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1540 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 1541 if (iascii || isdraw || isbinary || issocket) { 1542 ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 1543 } 1544 PetscFunctionReturn(0); 1545 } 1546 1547 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 1548 { 1549 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1550 PetscErrorCode ierr; 1551 Vec bb1 = 0; 1552 PetscBool hasop; 1553 1554 PetscFunctionBegin; 1555 if (flag == SOR_APPLY_UPPER) { 1556 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1557 PetscFunctionReturn(0); 1558 } 1559 1560 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { 1561 ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr); 1562 } 1563 1564 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1565 if (flag & SOR_ZERO_INITIAL_GUESS) { 1566 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1567 its--; 1568 } 1569 1570 while (its--) { 1571 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1573 1574 /* update rhs: bb1 = bb - B*x */ 1575 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1576 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1577 1578 /* local sweep */ 1579 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1580 } 1581 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1582 if (flag & SOR_ZERO_INITIAL_GUESS) { 1583 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1584 its--; 1585 } 1586 while (its--) { 1587 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1589 1590 /* update rhs: bb1 = bb - B*x */ 1591 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1592 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1593 1594 /* local sweep */ 1595 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1596 } 1597 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1598 if (flag & SOR_ZERO_INITIAL_GUESS) { 1599 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr); 1600 its--; 1601 } 1602 while (its--) { 1603 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1604 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1605 1606 /* update rhs: bb1 = bb - B*x */ 1607 ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr); 1608 ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr); 1609 1610 /* local sweep */ 1611 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr); 1612 } 1613 } else if (flag & SOR_EISENSTAT) { 1614 Vec xx1; 1615 1616 ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr); 1617 ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr); 1618 1619 ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1620 ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1621 if (!mat->diag) { 1622 ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr); 1623 ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr); 1624 } 1625 ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr); 1626 if (hasop) { 1627 ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr); 1628 } else { 1629 ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr); 1630 } 1631 ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr); 1632 1633 ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr); 1634 1635 /* local sweep */ 1636 ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr); 1637 ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr); 1638 ierr = VecDestroy(&xx1);CHKERRQ(ierr); 1639 } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported"); 1640 1641 ierr = VecDestroy(&bb1);CHKERRQ(ierr); 1642 1643 matin->factorerrortype = mat->A->factorerrortype; 1644 PetscFunctionReturn(0); 1645 } 1646 1647 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B) 1648 { 1649 Mat aA,aB,Aperm; 1650 const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj; 1651 PetscScalar *aa,*ba; 1652 PetscInt i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest; 1653 PetscSF rowsf,sf; 1654 IS parcolp = NULL; 1655 PetscBool done; 1656 PetscErrorCode ierr; 1657 1658 PetscFunctionBegin; 1659 ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr); 1660 ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr); 1661 ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr); 1662 ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr); 1663 1664 /* Invert row permutation to find out where my rows should go */ 1665 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr); 1666 ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr); 1667 ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr); 1668 for (i=0; i<m; i++) work[i] = A->rmap->rstart + i; 1669 ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1670 ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr); 1671 1672 /* Invert column permutation to find out where my columns should go */ 1673 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1674 ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr); 1675 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1676 for (i=0; i<n; i++) work[i] = A->cmap->rstart + i; 1677 ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1678 ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr); 1679 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1680 1681 ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr); 1682 ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr); 1683 ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr); 1684 1685 /* Find out where my gcols should go */ 1686 ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr); 1687 ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr); 1688 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 1689 ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr); 1690 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 1691 ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1692 ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr); 1693 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 1694 1695 ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr); 1696 ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1697 ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1698 for (i=0; i<m; i++) { 1699 PetscInt row = rdest[i],rowner; 1700 ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr); 1701 for (j=ai[i]; j<ai[i+1]; j++) { 1702 PetscInt cowner,col = cdest[aj[j]]; 1703 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */ 1704 if (rowner == cowner) dnnz[i]++; 1705 else onnz[i]++; 1706 } 1707 for (j=bi[i]; j<bi[i+1]; j++) { 1708 PetscInt cowner,col = gcdest[bj[j]]; 1709 ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); 1710 if (rowner == cowner) dnnz[i]++; 1711 else onnz[i]++; 1712 } 1713 } 1714 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1715 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr); 1716 ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1717 ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr); 1718 ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr); 1719 1720 ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr); 1721 ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr); 1722 ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr); 1723 for (i=0; i<m; i++) { 1724 PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */ 1725 PetscInt j0,rowlen; 1726 rowlen = ai[i+1] - ai[i]; 1727 for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1728 for ( ; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]]; 1729 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1730 } 1731 rowlen = bi[i+1] - bi[i]; 1732 for (j0=j=0; j<rowlen; j0=j) { 1733 for ( ; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]]; 1734 ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr); 1735 } 1736 } 1737 ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1738 ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1739 ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr); 1740 ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr); 1741 ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr); 1742 ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr); 1743 ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr); 1744 ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr); 1745 ierr = PetscFree(gcdest);CHKERRQ(ierr); 1746 if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);} 1747 *B = Aperm; 1748 PetscFunctionReturn(0); 1749 } 1750 1751 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[]) 1752 { 1753 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1754 PetscErrorCode ierr; 1755 1756 PetscFunctionBegin; 1757 ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr); 1758 if (ghosts) *ghosts = aij->garray; 1759 PetscFunctionReturn(0); 1760 } 1761 1762 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info) 1763 { 1764 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1765 Mat A = mat->A,B = mat->B; 1766 PetscErrorCode ierr; 1767 PetscReal isend[5],irecv[5]; 1768 1769 PetscFunctionBegin; 1770 info->block_size = 1.0; 1771 ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr); 1772 1773 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 1774 isend[3] = info->memory; isend[4] = info->mallocs; 1775 1776 ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr); 1777 1778 isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded; 1779 isend[3] += info->memory; isend[4] += info->mallocs; 1780 if (flag == MAT_LOCAL) { 1781 info->nz_used = isend[0]; 1782 info->nz_allocated = isend[1]; 1783 info->nz_unneeded = isend[2]; 1784 info->memory = isend[3]; 1785 info->mallocs = isend[4]; 1786 } else if (flag == MAT_GLOBAL_MAX) { 1787 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1788 1789 info->nz_used = irecv[0]; 1790 info->nz_allocated = irecv[1]; 1791 info->nz_unneeded = irecv[2]; 1792 info->memory = irecv[3]; 1793 info->mallocs = irecv[4]; 1794 } else if (flag == MAT_GLOBAL_SUM) { 1795 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr); 1796 1797 info->nz_used = irecv[0]; 1798 info->nz_allocated = irecv[1]; 1799 info->nz_unneeded = irecv[2]; 1800 info->memory = irecv[3]; 1801 info->mallocs = irecv[4]; 1802 } 1803 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1804 info->fill_ratio_needed = 0; 1805 info->factor_mallocs = 0; 1806 PetscFunctionReturn(0); 1807 } 1808 1809 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg) 1810 { 1811 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 1812 PetscErrorCode ierr; 1813 1814 PetscFunctionBegin; 1815 switch (op) { 1816 case MAT_NEW_NONZERO_LOCATIONS: 1817 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1818 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1819 case MAT_KEEP_NONZERO_PATTERN: 1820 case MAT_NEW_NONZERO_LOCATION_ERR: 1821 case MAT_USE_INODES: 1822 case MAT_IGNORE_ZERO_ENTRIES: 1823 MatCheckPreallocated(A,1); 1824 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1825 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1826 break; 1827 case MAT_ROW_ORIENTED: 1828 MatCheckPreallocated(A,1); 1829 a->roworiented = flg; 1830 1831 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 1832 ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr); 1833 break; 1834 case MAT_NEW_DIAGONALS: 1835 case MAT_SORTED_FULL: 1836 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1837 break; 1838 case MAT_IGNORE_OFF_PROC_ENTRIES: 1839 a->donotstash = flg; 1840 break; 1841 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1842 case MAT_SPD: 1843 case MAT_SYMMETRIC: 1844 case MAT_STRUCTURALLY_SYMMETRIC: 1845 case MAT_HERMITIAN: 1846 case MAT_SYMMETRY_ETERNAL: 1847 break; 1848 case MAT_SUBMAT_SINGLEIS: 1849 A->submat_singleis = flg; 1850 break; 1851 case MAT_STRUCTURE_ONLY: 1852 /* The option is handled directly by MatSetOption() */ 1853 break; 1854 default: 1855 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1856 } 1857 PetscFunctionReturn(0); 1858 } 1859 1860 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1861 { 1862 Mat_MPIAIJ *mat = (Mat_MPIAIJ*)matin->data; 1863 PetscScalar *vworkA,*vworkB,**pvA,**pvB,*v_p; 1864 PetscErrorCode ierr; 1865 PetscInt i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart; 1866 PetscInt nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend; 1867 PetscInt *cmap,*idx_p; 1868 1869 PetscFunctionBegin; 1870 if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1871 mat->getrowactive = PETSC_TRUE; 1872 1873 if (!mat->rowvalues && (idx || v)) { 1874 /* 1875 allocate enough space to hold information from the longest row. 1876 */ 1877 Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data; 1878 PetscInt max = 1,tmp; 1879 for (i=0; i<matin->rmap->n; i++) { 1880 tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; 1881 if (max < tmp) max = tmp; 1882 } 1883 ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr); 1884 } 1885 1886 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1887 lrow = row - rstart; 1888 1889 pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB; 1890 if (!v) {pvA = 0; pvB = 0;} 1891 if (!idx) {pcA = 0; if (!v) pcB = 0;} 1892 ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1893 ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1894 nztot = nzA + nzB; 1895 1896 cmap = mat->garray; 1897 if (v || idx) { 1898 if (nztot) { 1899 /* Sort by increasing column numbers, assuming A and B already sorted */ 1900 PetscInt imark = -1; 1901 if (v) { 1902 *v = v_p = mat->rowvalues; 1903 for (i=0; i<nzB; i++) { 1904 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1905 else break; 1906 } 1907 imark = i; 1908 for (i=0; i<nzA; i++) v_p[imark+i] = vworkA[i]; 1909 for (i=imark; i<nzB; i++) v_p[nzA+i] = vworkB[i]; 1910 } 1911 if (idx) { 1912 *idx = idx_p = mat->rowindices; 1913 if (imark > -1) { 1914 for (i=0; i<imark; i++) { 1915 idx_p[i] = cmap[cworkB[i]]; 1916 } 1917 } else { 1918 for (i=0; i<nzB; i++) { 1919 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1920 else break; 1921 } 1922 imark = i; 1923 } 1924 for (i=0; i<nzA; i++) idx_p[imark+i] = cstart + cworkA[i]; 1925 for (i=imark; i<nzB; i++) idx_p[nzA+i] = cmap[cworkB[i]]; 1926 } 1927 } else { 1928 if (idx) *idx = 0; 1929 if (v) *v = 0; 1930 } 1931 } 1932 *nz = nztot; 1933 ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr); 1934 ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 1938 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1939 { 1940 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1941 1942 PetscFunctionBegin; 1943 if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first"); 1944 aij->getrowactive = PETSC_FALSE; 1945 PetscFunctionReturn(0); 1946 } 1947 1948 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm) 1949 { 1950 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 1951 Mat_SeqAIJ *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data; 1952 PetscErrorCode ierr; 1953 PetscInt i,j,cstart = mat->cmap->rstart; 1954 PetscReal sum = 0.0; 1955 MatScalar *v; 1956 1957 PetscFunctionBegin; 1958 if (aij->size == 1) { 1959 ierr = MatNorm(aij->A,type,norm);CHKERRQ(ierr); 1960 } else { 1961 if (type == NORM_FROBENIUS) { 1962 v = amat->a; 1963 for (i=0; i<amat->nz; i++) { 1964 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1965 } 1966 v = bmat->a; 1967 for (i=0; i<bmat->nz; i++) { 1968 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 1969 } 1970 ierr = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1971 *norm = PetscSqrtReal(*norm); 1972 ierr = PetscLogFlops(2*amat->nz+2*bmat->nz);CHKERRQ(ierr); 1973 } else if (type == NORM_1) { /* max column norm */ 1974 PetscReal *tmp,*tmp2; 1975 PetscInt *jj,*garray = aij->garray; 1976 ierr = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr); 1977 ierr = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr); 1978 *norm = 0.0; 1979 v = amat->a; jj = amat->j; 1980 for (j=0; j<amat->nz; j++) { 1981 tmp[cstart + *jj++] += PetscAbsScalar(*v); v++; 1982 } 1983 v = bmat->a; jj = bmat->j; 1984 for (j=0; j<bmat->nz; j++) { 1985 tmp[garray[*jj++]] += PetscAbsScalar(*v); v++; 1986 } 1987 ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 1988 for (j=0; j<mat->cmap->N; j++) { 1989 if (tmp2[j] > *norm) *norm = tmp2[j]; 1990 } 1991 ierr = PetscFree(tmp);CHKERRQ(ierr); 1992 ierr = PetscFree(tmp2);CHKERRQ(ierr); 1993 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 1994 } else if (type == NORM_INFINITY) { /* max row norm */ 1995 PetscReal ntemp = 0.0; 1996 for (j=0; j<aij->A->rmap->n; j++) { 1997 v = amat->a + amat->i[j]; 1998 sum = 0.0; 1999 for (i=0; i<amat->i[j+1]-amat->i[j]; i++) { 2000 sum += PetscAbsScalar(*v); v++; 2001 } 2002 v = bmat->a + bmat->i[j]; 2003 for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) { 2004 sum += PetscAbsScalar(*v); v++; 2005 } 2006 if (sum > ntemp) ntemp = sum; 2007 } 2008 ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 2009 ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr); 2010 } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm"); 2011 } 2012 PetscFunctionReturn(0); 2013 } 2014 2015 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout) 2016 { 2017 Mat_MPIAIJ *a =(Mat_MPIAIJ*)A->data,*b; 2018 Mat_SeqAIJ *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag; 2019 PetscInt M = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol; 2020 const PetscInt *ai,*aj,*bi,*bj,*B_diag_i; 2021 PetscErrorCode ierr; 2022 Mat B,A_diag,*B_diag; 2023 const MatScalar *array; 2024 2025 PetscFunctionBegin; 2026 ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n; 2027 ai = Aloc->i; aj = Aloc->j; 2028 bi = Bloc->i; bj = Bloc->j; 2029 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 2030 PetscInt *d_nnz,*g_nnz,*o_nnz; 2031 PetscSFNode *oloc; 2032 PETSC_UNUSED PetscSF sf; 2033 2034 ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr); 2035 /* compute d_nnz for preallocation */ 2036 ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr); 2037 for (i=0; i<ai[ma]; i++) { 2038 d_nnz[aj[i]]++; 2039 } 2040 /* compute local off-diagonal contributions */ 2041 ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr); 2042 for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++; 2043 /* map those to global */ 2044 ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr); 2045 ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr); 2046 ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr); 2047 ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr); 2048 ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2049 ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr); 2050 ierr = PetscSFDestroy(&sf);CHKERRQ(ierr); 2051 2052 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 2053 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 2054 ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr); 2055 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 2056 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 2057 ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr); 2058 } else { 2059 B = *matout; 2060 ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2061 } 2062 2063 b = (Mat_MPIAIJ*)B->data; 2064 A_diag = a->A; 2065 B_diag = &b->A; 2066 sub_B_diag = (Mat_SeqAIJ*)(*B_diag)->data; 2067 A_diag_ncol = A_diag->cmap->N; 2068 B_diag_ilen = sub_B_diag->ilen; 2069 B_diag_i = sub_B_diag->i; 2070 2071 /* Set ilen for diagonal of B */ 2072 for (i=0; i<A_diag_ncol; i++) { 2073 B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i]; 2074 } 2075 2076 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 2077 very quickly (=without using MatSetValues), because all writes are local. */ 2078 ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr); 2079 2080 /* copy over the B part */ 2081 ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr); 2082 array = Bloc->a; 2083 row = A->rmap->rstart; 2084 for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]]; 2085 cols_tmp = cols; 2086 for (i=0; i<mb; i++) { 2087 ncol = bi[i+1]-bi[i]; 2088 ierr = MatSetValues(B,ncol,cols_tmp,1,&row,array,INSERT_VALUES);CHKERRQ(ierr); 2089 row++; 2090 array += ncol; cols_tmp += ncol; 2091 } 2092 ierr = PetscFree(cols);CHKERRQ(ierr); 2093 2094 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2095 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2096 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 2097 *matout = B; 2098 } else { 2099 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 2100 } 2101 PetscFunctionReturn(0); 2102 } 2103 2104 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr) 2105 { 2106 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2107 Mat a = aij->A,b = aij->B; 2108 PetscErrorCode ierr; 2109 PetscInt s1,s2,s3; 2110 2111 PetscFunctionBegin; 2112 ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr); 2113 if (rr) { 2114 ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr); 2115 if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size"); 2116 /* Overlap communication with computation. */ 2117 ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 } 2119 if (ll) { 2120 ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr); 2121 if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size"); 2122 ierr = (*b->ops->diagonalscale)(b,ll,0);CHKERRQ(ierr); 2123 } 2124 /* scale the diagonal block */ 2125 ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr); 2126 2127 if (rr) { 2128 /* Do a scatter end and then right scale the off-diagonal block */ 2129 ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2130 ierr = (*b->ops->diagonalscale)(b,0,aij->lvec);CHKERRQ(ierr); 2131 } 2132 PetscFunctionReturn(0); 2133 } 2134 2135 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2136 { 2137 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2138 PetscErrorCode ierr; 2139 2140 PetscFunctionBegin; 2141 ierr = MatSetUnfactored(a->A);CHKERRQ(ierr); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool *flag) 2146 { 2147 Mat_MPIAIJ *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data; 2148 Mat a,b,c,d; 2149 PetscBool flg; 2150 PetscErrorCode ierr; 2151 2152 PetscFunctionBegin; 2153 a = matA->A; b = matA->B; 2154 c = matB->A; d = matB->B; 2155 2156 ierr = MatEqual(a,c,&flg);CHKERRQ(ierr); 2157 if (flg) { 2158 ierr = MatEqual(b,d,&flg);CHKERRQ(ierr); 2159 } 2160 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2161 PetscFunctionReturn(0); 2162 } 2163 2164 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str) 2165 { 2166 PetscErrorCode ierr; 2167 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2168 Mat_MPIAIJ *b = (Mat_MPIAIJ*)B->data; 2169 2170 PetscFunctionBegin; 2171 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2172 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2173 /* because of the column compression in the off-processor part of the matrix a->B, 2174 the number of columns in a->B and b->B may be different, hence we cannot call 2175 the MatCopy() directly on the two parts. If need be, we can provide a more 2176 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2177 then copying the submatrices */ 2178 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2179 } else { 2180 ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr); 2181 ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr); 2182 } 2183 ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr); 2184 PetscFunctionReturn(0); 2185 } 2186 2187 PetscErrorCode MatSetUp_MPIAIJ(Mat A) 2188 { 2189 PetscErrorCode ierr; 2190 2191 PetscFunctionBegin; 2192 ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr); 2193 PetscFunctionReturn(0); 2194 } 2195 2196 /* 2197 Computes the number of nonzeros per row needed for preallocation when X and Y 2198 have different nonzero structure. 2199 */ 2200 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz) 2201 { 2202 PetscInt i,j,k,nzx,nzy; 2203 2204 PetscFunctionBegin; 2205 /* Set the number of nonzeros in the new matrix */ 2206 for (i=0; i<m; i++) { 2207 const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i]; 2208 nzx = xi[i+1] - xi[i]; 2209 nzy = yi[i+1] - yi[i]; 2210 nnz[i] = 0; 2211 for (j=0,k=0; j<nzx; j++) { /* Point in X */ 2212 for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2213 if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++; /* Skip duplicate */ 2214 nnz[i]++; 2215 } 2216 for (; k<nzy; k++) nnz[i]++; 2217 } 2218 PetscFunctionReturn(0); 2219 } 2220 2221 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2222 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz) 2223 { 2224 PetscErrorCode ierr; 2225 PetscInt m = Y->rmap->N; 2226 Mat_SeqAIJ *x = (Mat_SeqAIJ*)X->data; 2227 Mat_SeqAIJ *y = (Mat_SeqAIJ*)Y->data; 2228 2229 PetscFunctionBegin; 2230 ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr); 2231 PetscFunctionReturn(0); 2232 } 2233 2234 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2235 { 2236 PetscErrorCode ierr; 2237 Mat_MPIAIJ *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data; 2238 PetscBLASInt bnz,one=1; 2239 Mat_SeqAIJ *x,*y; 2240 2241 PetscFunctionBegin; 2242 if (str == SAME_NONZERO_PATTERN) { 2243 PetscScalar alpha = a; 2244 x = (Mat_SeqAIJ*)xx->A->data; 2245 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2246 y = (Mat_SeqAIJ*)yy->A->data; 2247 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2248 x = (Mat_SeqAIJ*)xx->B->data; 2249 y = (Mat_SeqAIJ*)yy->B->data; 2250 ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr); 2251 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2252 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2253 /* the MatAXPY_Basic* subroutines calls MatAssembly, so the matrix on the GPU 2254 will be updated */ 2255 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 2256 if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) { 2257 Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 2258 } 2259 #endif 2260 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2261 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2262 } else { 2263 Mat B; 2264 PetscInt *nnz_d,*nnz_o; 2265 ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr); 2266 ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr); 2267 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2268 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2269 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2270 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2271 ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr); 2272 ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr); 2273 ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr); 2274 ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr); 2275 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2276 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2277 ierr = PetscFree(nnz_d);CHKERRQ(ierr); 2278 ierr = PetscFree(nnz_o);CHKERRQ(ierr); 2279 } 2280 PetscFunctionReturn(0); 2281 } 2282 2283 extern PetscErrorCode MatConjugate_SeqAIJ(Mat); 2284 2285 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2286 { 2287 #if defined(PETSC_USE_COMPLEX) 2288 PetscErrorCode ierr; 2289 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2290 2291 PetscFunctionBegin; 2292 ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr); 2293 ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr); 2294 #else 2295 PetscFunctionBegin; 2296 #endif 2297 PetscFunctionReturn(0); 2298 } 2299 2300 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2301 { 2302 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2303 PetscErrorCode ierr; 2304 2305 PetscFunctionBegin; 2306 ierr = MatRealPart(a->A);CHKERRQ(ierr); 2307 ierr = MatRealPart(a->B);CHKERRQ(ierr); 2308 PetscFunctionReturn(0); 2309 } 2310 2311 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2312 { 2313 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 2318 ierr = MatImaginaryPart(a->B);CHKERRQ(ierr); 2319 PetscFunctionReturn(0); 2320 } 2321 2322 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2323 { 2324 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2325 PetscErrorCode ierr; 2326 PetscInt i,*idxb = 0; 2327 PetscScalar *va,*vb; 2328 Vec vtmp; 2329 2330 PetscFunctionBegin; 2331 ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr); 2332 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2333 if (idx) { 2334 for (i=0; i<A->rmap->n; i++) { 2335 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2336 } 2337 } 2338 2339 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2340 if (idx) { 2341 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2342 } 2343 ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2344 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2345 2346 for (i=0; i<A->rmap->n; i++) { 2347 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2348 va[i] = vb[i]; 2349 if (idx) idx[i] = a->garray[idxb[i]]; 2350 } 2351 } 2352 2353 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2354 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2355 ierr = PetscFree(idxb);CHKERRQ(ierr); 2356 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2357 PetscFunctionReturn(0); 2358 } 2359 2360 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2361 { 2362 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2363 PetscErrorCode ierr; 2364 PetscInt i,*idxb = 0; 2365 PetscScalar *va,*vb; 2366 Vec vtmp; 2367 2368 PetscFunctionBegin; 2369 ierr = MatGetRowMinAbs(a->A,v,idx);CHKERRQ(ierr); 2370 ierr = VecGetArray(v,&va);CHKERRQ(ierr); 2371 if (idx) { 2372 for (i=0; i<A->cmap->n; i++) { 2373 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2374 } 2375 } 2376 2377 ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr); 2378 if (idx) { 2379 ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr); 2380 } 2381 ierr = MatGetRowMinAbs(a->B,vtmp,idxb);CHKERRQ(ierr); 2382 ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr); 2383 2384 for (i=0; i<A->rmap->n; i++) { 2385 if (PetscAbsScalar(va[i]) > PetscAbsScalar(vb[i])) { 2386 va[i] = vb[i]; 2387 if (idx) idx[i] = a->garray[idxb[i]]; 2388 } 2389 } 2390 2391 ierr = VecRestoreArray(v,&va);CHKERRQ(ierr); 2392 ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr); 2393 ierr = PetscFree(idxb);CHKERRQ(ierr); 2394 ierr = VecDestroy(&vtmp);CHKERRQ(ierr); 2395 PetscFunctionReturn(0); 2396 } 2397 2398 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2399 { 2400 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2401 PetscInt n = A->rmap->n; 2402 PetscInt cstart = A->cmap->rstart; 2403 PetscInt *cmap = mat->garray; 2404 PetscInt *diagIdx, *offdiagIdx; 2405 Vec diagV, offdiagV; 2406 PetscScalar *a, *diagA, *offdiagA; 2407 PetscInt r; 2408 PetscErrorCode ierr; 2409 2410 PetscFunctionBegin; 2411 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2412 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &diagV);CHKERRQ(ierr); 2413 ierr = VecCreateSeq(PetscObjectComm((PetscObject)A), n, &offdiagV);CHKERRQ(ierr); 2414 ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2415 ierr = MatGetRowMin(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2416 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2417 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2418 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2419 for (r = 0; r < n; ++r) { 2420 if (PetscAbsScalar(diagA[r]) <= PetscAbsScalar(offdiagA[r])) { 2421 a[r] = diagA[r]; 2422 idx[r] = cstart + diagIdx[r]; 2423 } else { 2424 a[r] = offdiagA[r]; 2425 idx[r] = cmap[offdiagIdx[r]]; 2426 } 2427 } 2428 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2429 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2430 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2431 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2432 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2433 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2434 PetscFunctionReturn(0); 2435 } 2436 2437 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2438 { 2439 Mat_MPIAIJ *mat = (Mat_MPIAIJ*) A->data; 2440 PetscInt n = A->rmap->n; 2441 PetscInt cstart = A->cmap->rstart; 2442 PetscInt *cmap = mat->garray; 2443 PetscInt *diagIdx, *offdiagIdx; 2444 Vec diagV, offdiagV; 2445 PetscScalar *a, *diagA, *offdiagA; 2446 PetscInt r; 2447 PetscErrorCode ierr; 2448 2449 PetscFunctionBegin; 2450 ierr = PetscMalloc2(n,&diagIdx,n,&offdiagIdx);CHKERRQ(ierr); 2451 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &diagV);CHKERRQ(ierr); 2452 ierr = VecCreateSeq(PETSC_COMM_SELF, n, &offdiagV);CHKERRQ(ierr); 2453 ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr); 2454 ierr = MatGetRowMax(mat->B, offdiagV, offdiagIdx);CHKERRQ(ierr); 2455 ierr = VecGetArray(v, &a);CHKERRQ(ierr); 2456 ierr = VecGetArray(diagV, &diagA);CHKERRQ(ierr); 2457 ierr = VecGetArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2458 for (r = 0; r < n; ++r) { 2459 if (PetscAbsScalar(diagA[r]) >= PetscAbsScalar(offdiagA[r])) { 2460 a[r] = diagA[r]; 2461 idx[r] = cstart + diagIdx[r]; 2462 } else { 2463 a[r] = offdiagA[r]; 2464 idx[r] = cmap[offdiagIdx[r]]; 2465 } 2466 } 2467 ierr = VecRestoreArray(v, &a);CHKERRQ(ierr); 2468 ierr = VecRestoreArray(diagV, &diagA);CHKERRQ(ierr); 2469 ierr = VecRestoreArray(offdiagV, &offdiagA);CHKERRQ(ierr); 2470 ierr = VecDestroy(&diagV);CHKERRQ(ierr); 2471 ierr = VecDestroy(&offdiagV);CHKERRQ(ierr); 2472 ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr); 2473 PetscFunctionReturn(0); 2474 } 2475 2476 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat) 2477 { 2478 PetscErrorCode ierr; 2479 Mat *dummy; 2480 2481 PetscFunctionBegin; 2482 ierr = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr); 2483 *newmat = *dummy; 2484 ierr = PetscFree(dummy);CHKERRQ(ierr); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values) 2489 { 2490 Mat_MPIAIJ *a = (Mat_MPIAIJ*) A->data; 2491 PetscErrorCode ierr; 2492 2493 PetscFunctionBegin; 2494 ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr); 2495 A->factorerrortype = a->A->factorerrortype; 2496 PetscFunctionReturn(0); 2497 } 2498 2499 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx) 2500 { 2501 PetscErrorCode ierr; 2502 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)x->data; 2503 2504 PetscFunctionBegin; 2505 if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2506 ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr); 2507 if (x->assembled) { 2508 ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr); 2509 } else { 2510 ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr); 2511 } 2512 ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2513 ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc) 2518 { 2519 PetscFunctionBegin; 2520 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2521 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2522 PetscFunctionReturn(0); 2523 } 2524 2525 /*@ 2526 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2527 2528 Collective on Mat 2529 2530 Input Parameters: 2531 + A - the matrix 2532 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2533 2534 Level: advanced 2535 2536 @*/ 2537 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc) 2538 { 2539 PetscErrorCode ierr; 2540 2541 PetscFunctionBegin; 2542 ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A) 2547 { 2548 PetscErrorCode ierr; 2549 PetscBool sc = PETSC_FALSE,flg; 2550 2551 PetscFunctionBegin; 2552 ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr); 2553 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2554 ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr); 2555 if (flg) { 2556 ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr); 2557 } 2558 ierr = PetscOptionsTail();CHKERRQ(ierr); 2559 PetscFunctionReturn(0); 2560 } 2561 2562 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a) 2563 { 2564 PetscErrorCode ierr; 2565 Mat_MPIAIJ *maij = (Mat_MPIAIJ*)Y->data; 2566 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)maij->A->data; 2567 2568 PetscFunctionBegin; 2569 if (!Y->preallocated) { 2570 ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr); 2571 } else if (!aij->nz) { 2572 PetscInt nonew = aij->nonew; 2573 ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr); 2574 aij->nonew = nonew; 2575 } 2576 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2577 PetscFunctionReturn(0); 2578 } 2579 2580 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool *missing,PetscInt *d) 2581 { 2582 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2583 PetscErrorCode ierr; 2584 2585 PetscFunctionBegin; 2586 if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices"); 2587 ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr); 2588 if (d) { 2589 PetscInt rstart; 2590 ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr); 2591 *d += rstart; 2592 2593 } 2594 PetscFunctionReturn(0); 2595 } 2596 2597 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag) 2598 { 2599 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 2600 PetscErrorCode ierr; 2601 2602 PetscFunctionBegin; 2603 ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr); 2604 PetscFunctionReturn(0); 2605 } 2606 2607 /* -------------------------------------------------------------------*/ 2608 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2609 MatGetRow_MPIAIJ, 2610 MatRestoreRow_MPIAIJ, 2611 MatMult_MPIAIJ, 2612 /* 4*/ MatMultAdd_MPIAIJ, 2613 MatMultTranspose_MPIAIJ, 2614 MatMultTransposeAdd_MPIAIJ, 2615 0, 2616 0, 2617 0, 2618 /*10*/ 0, 2619 0, 2620 0, 2621 MatSOR_MPIAIJ, 2622 MatTranspose_MPIAIJ, 2623 /*15*/ MatGetInfo_MPIAIJ, 2624 MatEqual_MPIAIJ, 2625 MatGetDiagonal_MPIAIJ, 2626 MatDiagonalScale_MPIAIJ, 2627 MatNorm_MPIAIJ, 2628 /*20*/ MatAssemblyBegin_MPIAIJ, 2629 MatAssemblyEnd_MPIAIJ, 2630 MatSetOption_MPIAIJ, 2631 MatZeroEntries_MPIAIJ, 2632 /*24*/ MatZeroRows_MPIAIJ, 2633 0, 2634 0, 2635 0, 2636 0, 2637 /*29*/ MatSetUp_MPIAIJ, 2638 0, 2639 0, 2640 MatGetDiagonalBlock_MPIAIJ, 2641 0, 2642 /*34*/ MatDuplicate_MPIAIJ, 2643 0, 2644 0, 2645 0, 2646 0, 2647 /*39*/ MatAXPY_MPIAIJ, 2648 MatCreateSubMatrices_MPIAIJ, 2649 MatIncreaseOverlap_MPIAIJ, 2650 MatGetValues_MPIAIJ, 2651 MatCopy_MPIAIJ, 2652 /*44*/ MatGetRowMax_MPIAIJ, 2653 MatScale_MPIAIJ, 2654 MatShift_MPIAIJ, 2655 MatDiagonalSet_MPIAIJ, 2656 MatZeroRowsColumns_MPIAIJ, 2657 /*49*/ MatSetRandom_MPIAIJ, 2658 0, 2659 0, 2660 0, 2661 0, 2662 /*54*/ MatFDColoringCreate_MPIXAIJ, 2663 0, 2664 MatSetUnfactored_MPIAIJ, 2665 MatPermute_MPIAIJ, 2666 0, 2667 /*59*/ MatCreateSubMatrix_MPIAIJ, 2668 MatDestroy_MPIAIJ, 2669 MatView_MPIAIJ, 2670 0, 2671 MatMatMatMult_MPIAIJ_MPIAIJ_MPIAIJ, 2672 /*64*/ MatMatMatMultSymbolic_MPIAIJ_MPIAIJ_MPIAIJ, 2673 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2674 0, 2675 0, 2676 0, 2677 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2678 MatGetRowMinAbs_MPIAIJ, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /*75*/ MatFDColoringApply_AIJ, 2684 MatSetFromOptions_MPIAIJ, 2685 0, 2686 0, 2687 MatFindZeroDiagonals_MPIAIJ, 2688 /*80*/ 0, 2689 0, 2690 0, 2691 /*83*/ MatLoad_MPIAIJ, 2692 MatIsSymmetric_MPIAIJ, 2693 0, 2694 0, 2695 0, 2696 0, 2697 /*89*/ MatMatMult_MPIAIJ_MPIAIJ, 2698 MatMatMultSymbolic_MPIAIJ_MPIAIJ, 2699 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2700 MatPtAP_MPIAIJ_MPIAIJ, 2701 MatPtAPSymbolic_MPIAIJ_MPIAIJ, 2702 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2703 0, 2704 0, 2705 0, 2706 0, 2707 /*99*/ 0, 2708 0, 2709 0, 2710 MatConjugate_MPIAIJ, 2711 0, 2712 /*104*/MatSetValuesRow_MPIAIJ, 2713 MatRealPart_MPIAIJ, 2714 MatImaginaryPart_MPIAIJ, 2715 0, 2716 0, 2717 /*109*/0, 2718 0, 2719 MatGetRowMin_MPIAIJ, 2720 0, 2721 MatMissingDiagonal_MPIAIJ, 2722 /*114*/MatGetSeqNonzeroStructure_MPIAIJ, 2723 0, 2724 MatGetGhosts_MPIAIJ, 2725 0, 2726 0, 2727 /*119*/0, 2728 0, 2729 0, 2730 0, 2731 MatGetMultiProcBlock_MPIAIJ, 2732 /*124*/MatFindNonzeroRows_MPIAIJ, 2733 MatGetColumnNorms_MPIAIJ, 2734 MatInvertBlockDiagonal_MPIAIJ, 2735 MatInvertVariableBlockDiagonal_MPIAIJ, 2736 MatCreateSubMatricesMPI_MPIAIJ, 2737 /*129*/0, 2738 MatTransposeMatMult_MPIAIJ_MPIAIJ, 2739 MatTransposeMatMultSymbolic_MPIAIJ_MPIAIJ, 2740 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2741 0, 2742 /*134*/0, 2743 0, 2744 MatRARt_MPIAIJ_MPIAIJ, 2745 0, 2746 0, 2747 /*139*/MatSetBlockSizes_MPIAIJ, 2748 0, 2749 0, 2750 MatFDColoringSetUp_MPIXAIJ, 2751 MatFindOffBlockDiagonalEntries_MPIAIJ, 2752 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIAIJ 2753 }; 2754 2755 /* ----------------------------------------------------------------------------------------*/ 2756 2757 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2758 { 2759 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2760 PetscErrorCode ierr; 2761 2762 PetscFunctionBegin; 2763 ierr = MatStoreValues(aij->A);CHKERRQ(ierr); 2764 ierr = MatStoreValues(aij->B);CHKERRQ(ierr); 2765 PetscFunctionReturn(0); 2766 } 2767 2768 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2769 { 2770 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 2771 PetscErrorCode ierr; 2772 2773 PetscFunctionBegin; 2774 ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr); 2775 ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr); 2776 PetscFunctionReturn(0); 2777 } 2778 2779 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 2780 { 2781 Mat_MPIAIJ *b; 2782 PetscErrorCode ierr; 2783 PetscMPIInt size; 2784 2785 PetscFunctionBegin; 2786 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2787 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2788 b = (Mat_MPIAIJ*)B->data; 2789 2790 #if defined(PETSC_USE_CTABLE) 2791 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2792 #else 2793 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2794 #endif 2795 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2796 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2797 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2798 2799 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2800 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 2801 ierr = MatDestroy(&b->B);CHKERRQ(ierr); 2802 ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr); 2803 ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr); 2804 ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr); 2805 ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr); 2806 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr); 2807 2808 if (!B->preallocated) { 2809 ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr); 2810 ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr); 2811 ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr); 2812 ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr); 2813 ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr); 2814 } 2815 2816 ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr); 2817 ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr); 2818 B->preallocated = PETSC_TRUE; 2819 B->was_assembled = PETSC_FALSE; 2820 B->assembled = PETSC_FALSE; 2821 PetscFunctionReturn(0); 2822 } 2823 2824 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2825 { 2826 Mat_MPIAIJ *b; 2827 PetscErrorCode ierr; 2828 2829 PetscFunctionBegin; 2830 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2831 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2832 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2833 b = (Mat_MPIAIJ*)B->data; 2834 2835 #if defined(PETSC_USE_CTABLE) 2836 ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr); 2837 #else 2838 ierr = PetscFree(b->colmap);CHKERRQ(ierr); 2839 #endif 2840 ierr = PetscFree(b->garray);CHKERRQ(ierr); 2841 ierr = VecDestroy(&b->lvec);CHKERRQ(ierr); 2842 ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr); 2843 2844 ierr = MatResetPreallocation(b->A);CHKERRQ(ierr); 2845 ierr = MatResetPreallocation(b->B);CHKERRQ(ierr); 2846 B->preallocated = PETSC_TRUE; 2847 B->was_assembled = PETSC_FALSE; 2848 B->assembled = PETSC_FALSE; 2849 PetscFunctionReturn(0); 2850 } 2851 2852 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat) 2853 { 2854 Mat mat; 2855 Mat_MPIAIJ *a,*oldmat = (Mat_MPIAIJ*)matin->data; 2856 PetscErrorCode ierr; 2857 2858 PetscFunctionBegin; 2859 *newmat = 0; 2860 ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr); 2861 ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr); 2862 ierr = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr); 2863 ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr); 2864 a = (Mat_MPIAIJ*)mat->data; 2865 2866 mat->factortype = matin->factortype; 2867 mat->assembled = PETSC_TRUE; 2868 mat->insertmode = NOT_SET_VALUES; 2869 mat->preallocated = PETSC_TRUE; 2870 2871 a->size = oldmat->size; 2872 a->rank = oldmat->rank; 2873 a->donotstash = oldmat->donotstash; 2874 a->roworiented = oldmat->roworiented; 2875 a->rowindices = 0; 2876 a->rowvalues = 0; 2877 a->getrowactive = PETSC_FALSE; 2878 2879 ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr); 2880 ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr); 2881 2882 if (oldmat->colmap) { 2883 #if defined(PETSC_USE_CTABLE) 2884 ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr); 2885 #else 2886 ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr); 2887 ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr); 2888 ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr); 2889 #endif 2890 } else a->colmap = 0; 2891 if (oldmat->garray) { 2892 PetscInt len; 2893 len = oldmat->B->cmap->n; 2894 ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr); 2895 ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr); 2896 if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); } 2897 } else a->garray = 0; 2898 2899 ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr); 2900 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr); 2901 ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr); 2902 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr); 2903 2904 if (oldmat->Mvctx_mpi1) { 2905 ierr = VecScatterCopy(oldmat->Mvctx_mpi1,&a->Mvctx_mpi1);CHKERRQ(ierr); 2906 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx_mpi1);CHKERRQ(ierr); 2907 } 2908 2909 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2910 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2911 ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr); 2912 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr); 2913 ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr); 2914 *newmat = mat; 2915 PetscFunctionReturn(0); 2916 } 2917 2918 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 2919 { 2920 PetscBool isbinary, ishdf5; 2921 PetscErrorCode ierr; 2922 2923 PetscFunctionBegin; 2924 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2925 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2926 /* force binary viewer to load .info file if it has not yet done so */ 2927 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2928 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2929 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2930 if (isbinary) { 2931 ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr); 2932 } else if (ishdf5) { 2933 #if defined(PETSC_HAVE_HDF5) 2934 ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr); 2935 #else 2936 SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2937 #endif 2938 } else { 2939 SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2940 } 2941 PetscFunctionReturn(0); 2942 } 2943 2944 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat newMat, PetscViewer viewer) 2945 { 2946 PetscScalar *vals,*svals; 2947 MPI_Comm comm; 2948 PetscErrorCode ierr; 2949 PetscMPIInt rank,size,tag = ((PetscObject)viewer)->tag; 2950 PetscInt i,nz,j,rstart,rend,mmax,maxnz = 0; 2951 PetscInt header[4],*rowlengths = 0,M,N,m,*cols; 2952 PetscInt *ourlens = NULL,*procsnz = NULL,*offlens = NULL,jj,*mycols,*smycols; 2953 PetscInt cend,cstart,n,*rowners; 2954 int fd; 2955 PetscInt bs = newMat->rmap->bs; 2956 2957 PetscFunctionBegin; 2958 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 2959 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2960 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2961 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 2962 if (!rank) { 2963 ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr); 2964 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object"); 2965 if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk,cannot load as MATMPIAIJ"); 2966 } 2967 2968 ierr = PetscOptionsBegin(comm,NULL,"Options for loading MATMPIAIJ matrix","Mat");CHKERRQ(ierr); 2969 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 2970 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2971 if (bs < 0) bs = 1; 2972 2973 ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr); 2974 M = header[1]; N = header[2]; 2975 2976 /* If global sizes are set, check if they are consistent with that given in the file */ 2977 if (newMat->rmap->N >= 0 && newMat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newMat->rmap->N,M); 2978 if (newMat->cmap->N >=0 && newMat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newMat->cmap->N,N); 2979 2980 /* determine ownership of all (block) rows */ 2981 if (M%bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows (%d) and block size (%d)",M,bs); 2982 if (newMat->rmap->n < 0) m = bs*((M/bs)/size + (((M/bs) % size) > rank)); /* PETSC_DECIDE */ 2983 else m = newMat->rmap->n; /* Set by user */ 2984 2985 ierr = PetscMalloc1(size+1,&rowners);CHKERRQ(ierr); 2986 ierr = MPI_Allgather(&m,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr); 2987 2988 /* First process needs enough room for process with most rows */ 2989 if (!rank) { 2990 mmax = rowners[1]; 2991 for (i=2; i<=size; i++) { 2992 mmax = PetscMax(mmax, rowners[i]); 2993 } 2994 } else mmax = -1; /* unused, but compilers complain */ 2995 2996 rowners[0] = 0; 2997 for (i=2; i<=size; i++) { 2998 rowners[i] += rowners[i-1]; 2999 } 3000 rstart = rowners[rank]; 3001 rend = rowners[rank+1]; 3002 3003 /* distribute row lengths to all processors */ 3004 ierr = PetscMalloc2(m,&ourlens,m,&offlens);CHKERRQ(ierr); 3005 if (!rank) { 3006 ierr = PetscBinaryRead(fd,ourlens,m,NULL,PETSC_INT);CHKERRQ(ierr); 3007 ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr); 3008 ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr); 3009 for (j=0; j<m; j++) { 3010 procsnz[0] += ourlens[j]; 3011 } 3012 for (i=1; i<size; i++) { 3013 ierr = PetscBinaryRead(fd,rowlengths,rowners[i+1]-rowners[i],NULL,PETSC_INT);CHKERRQ(ierr); 3014 /* calculate the number of nonzeros on each processor */ 3015 for (j=0; j<rowners[i+1]-rowners[i]; j++) { 3016 procsnz[i] += rowlengths[j]; 3017 } 3018 ierr = MPIULong_Send(rowlengths,rowners[i+1]-rowners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3019 } 3020 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3021 } else { 3022 ierr = MPIULong_Recv(ourlens,m,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3023 } 3024 3025 if (!rank) { 3026 /* determine max buffer needed and allocate it */ 3027 maxnz = 0; 3028 for (i=0; i<size; i++) { 3029 maxnz = PetscMax(maxnz,procsnz[i]); 3030 } 3031 ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr); 3032 3033 /* read in my part of the matrix column indices */ 3034 nz = procsnz[0]; 3035 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3036 ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3037 3038 /* read in every one elses and ship off */ 3039 for (i=1; i<size; i++) { 3040 nz = procsnz[i]; 3041 ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr); 3042 ierr = MPIULong_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr); 3043 } 3044 ierr = PetscFree(cols);CHKERRQ(ierr); 3045 } else { 3046 /* determine buffer space needed for message */ 3047 nz = 0; 3048 for (i=0; i<m; i++) { 3049 nz += ourlens[i]; 3050 } 3051 ierr = PetscMalloc1(nz,&mycols);CHKERRQ(ierr); 3052 3053 /* receive message of column indices*/ 3054 ierr = MPIULong_Recv(mycols,nz,MPIU_INT,0,tag,comm);CHKERRQ(ierr); 3055 } 3056 3057 /* determine column ownership if matrix is not square */ 3058 if (N != M) { 3059 if (newMat->cmap->n < 0) n = N/size + ((N % size) > rank); 3060 else n = newMat->cmap->n; 3061 ierr = MPI_Scan(&n,&cend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3062 cstart = cend - n; 3063 } else { 3064 cstart = rstart; 3065 cend = rend; 3066 n = cend - cstart; 3067 } 3068 3069 /* loop over local rows, determining number of off diagonal entries */ 3070 ierr = PetscArrayzero(offlens,m);CHKERRQ(ierr); 3071 jj = 0; 3072 for (i=0; i<m; i++) { 3073 for (j=0; j<ourlens[i]; j++) { 3074 if (mycols[jj] < cstart || mycols[jj] >= cend) offlens[i]++; 3075 jj++; 3076 } 3077 } 3078 3079 for (i=0; i<m; i++) { 3080 ourlens[i] -= offlens[i]; 3081 } 3082 ierr = MatSetSizes(newMat,m,n,M,N);CHKERRQ(ierr); 3083 3084 if (bs > 1) {ierr = MatSetBlockSize(newMat,bs);CHKERRQ(ierr);} 3085 3086 ierr = MatMPIAIJSetPreallocation(newMat,0,ourlens,0,offlens);CHKERRQ(ierr); 3087 3088 for (i=0; i<m; i++) { 3089 ourlens[i] += offlens[i]; 3090 } 3091 3092 if (!rank) { 3093 ierr = PetscMalloc1(maxnz+1,&vals);CHKERRQ(ierr); 3094 3095 /* read in my part of the matrix numerical values */ 3096 nz = procsnz[0]; 3097 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3098 3099 /* insert into matrix */ 3100 jj = rstart; 3101 smycols = mycols; 3102 svals = vals; 3103 for (i=0; i<m; i++) { 3104 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3105 smycols += ourlens[i]; 3106 svals += ourlens[i]; 3107 jj++; 3108 } 3109 3110 /* read in other processors and ship out */ 3111 for (i=1; i<size; i++) { 3112 nz = procsnz[i]; 3113 ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr); 3114 ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3115 } 3116 ierr = PetscFree(procsnz);CHKERRQ(ierr); 3117 } else { 3118 /* receive numeric values */ 3119 ierr = PetscMalloc1(nz+1,&vals);CHKERRQ(ierr); 3120 3121 /* receive message of values*/ 3122 ierr = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newMat)->tag,comm);CHKERRQ(ierr); 3123 3124 /* insert into matrix */ 3125 jj = rstart; 3126 smycols = mycols; 3127 svals = vals; 3128 for (i=0; i<m; i++) { 3129 ierr = MatSetValues_MPIAIJ(newMat,1,&jj,ourlens[i],smycols,svals,INSERT_VALUES);CHKERRQ(ierr); 3130 smycols += ourlens[i]; 3131 svals += ourlens[i]; 3132 jj++; 3133 } 3134 } 3135 ierr = PetscFree2(ourlens,offlens);CHKERRQ(ierr); 3136 ierr = PetscFree(vals);CHKERRQ(ierr); 3137 ierr = PetscFree(mycols);CHKERRQ(ierr); 3138 ierr = PetscFree(rowners);CHKERRQ(ierr); 3139 ierr = MatAssemblyBegin(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3140 ierr = MatAssemblyEnd(newMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3141 PetscFunctionReturn(0); 3142 } 3143 3144 /* Not scalable because of ISAllGather() unless getting all columns. */ 3145 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq) 3146 { 3147 PetscErrorCode ierr; 3148 IS iscol_local; 3149 PetscBool isstride; 3150 PetscMPIInt lisstride=0,gisstride; 3151 3152 PetscFunctionBegin; 3153 /* check if we are grabbing all columns*/ 3154 ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr); 3155 3156 if (isstride) { 3157 PetscInt start,len,mstart,mlen; 3158 ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr); 3159 ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr); 3160 ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr); 3161 if (mstart == start && mlen-mstart == len) lisstride = 1; 3162 } 3163 3164 ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 3165 if (gisstride) { 3166 PetscInt N; 3167 ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr); 3168 ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),N,0,1,&iscol_local);CHKERRQ(ierr); 3169 ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr); 3170 ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr); 3171 } else { 3172 PetscInt cbs; 3173 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3174 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 3175 ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr); 3176 } 3177 3178 *isseq = iscol_local; 3179 PetscFunctionReturn(0); 3180 } 3181 3182 /* 3183 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3184 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3185 3186 Input Parameters: 3187 mat - matrix 3188 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3189 i.e., mat->rstart <= isrow[i] < mat->rend 3190 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3191 i.e., mat->cstart <= iscol[i] < mat->cend 3192 Output Parameter: 3193 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3194 iscol_o - sequential column index set for retrieving mat->B 3195 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3196 */ 3197 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[]) 3198 { 3199 PetscErrorCode ierr; 3200 Vec x,cmap; 3201 const PetscInt *is_idx; 3202 PetscScalar *xarray,*cmaparray; 3203 PetscInt ncols,isstart,*idx,m,rstart,*cmap1,count; 3204 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3205 Mat B=a->B; 3206 Vec lvec=a->lvec,lcmap; 3207 PetscInt i,cstart,cend,Bn=B->cmap->N; 3208 MPI_Comm comm; 3209 VecScatter Mvctx=a->Mvctx; 3210 3211 PetscFunctionBegin; 3212 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3213 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 3214 3215 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3216 ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr); 3217 ierr = VecSet(x,-1.0);CHKERRQ(ierr); 3218 ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr); 3219 ierr = VecSet(cmap,-1.0);CHKERRQ(ierr); 3220 3221 /* Get start indices */ 3222 ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3223 isstart -= ncols; 3224 ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr); 3225 3226 ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr); 3227 ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 3228 ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr); 3229 ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr); 3230 for (i=0; i<ncols; i++) { 3231 xarray[is_idx[i]-cstart] = (PetscScalar)is_idx[i]; 3232 cmaparray[is_idx[i]-cstart] = i + isstart; /* global index of iscol[i] */ 3233 idx[i] = is_idx[i]-cstart; /* local index of iscol[i] */ 3234 } 3235 ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 3236 ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr); 3237 ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr); 3238 3239 /* Get iscol_d */ 3240 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr); 3241 ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr); 3242 ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr); 3243 3244 /* Get isrow_d */ 3245 ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr); 3246 rstart = mat->rmap->rstart; 3247 ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr); 3248 ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr); 3249 for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart; 3250 ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr); 3251 3252 ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr); 3253 ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr); 3254 ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr); 3255 3256 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3257 ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3258 ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3259 3260 ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr); 3261 3262 ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3263 ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3264 3265 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3266 /* off-process column indices */ 3267 count = 0; 3268 ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr); 3269 ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr); 3270 3271 ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr); 3272 ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr); 3273 for (i=0; i<Bn; i++) { 3274 if (PetscRealPart(xarray[i]) > -1.0) { 3275 idx[count] = i; /* local column index in off-diagonal part B */ 3276 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3277 count++; 3278 } 3279 } 3280 ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr); 3281 ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr); 3282 3283 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr); 3284 /* cannot ensure iscol_o has same blocksize as iscol! */ 3285 3286 ierr = PetscFree(idx);CHKERRQ(ierr); 3287 *garray = cmap1; 3288 3289 ierr = VecDestroy(&x);CHKERRQ(ierr); 3290 ierr = VecDestroy(&cmap);CHKERRQ(ierr); 3291 ierr = VecDestroy(&lcmap);CHKERRQ(ierr); 3292 PetscFunctionReturn(0); 3293 } 3294 3295 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3296 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat) 3297 { 3298 PetscErrorCode ierr; 3299 Mat_MPIAIJ *a = (Mat_MPIAIJ*)mat->data,*asub; 3300 Mat M = NULL; 3301 MPI_Comm comm; 3302 IS iscol_d,isrow_d,iscol_o; 3303 Mat Asub = NULL,Bsub = NULL; 3304 PetscInt n; 3305 3306 PetscFunctionBegin; 3307 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3308 3309 if (call == MAT_REUSE_MATRIX) { 3310 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3311 ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3312 if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse"); 3313 3314 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr); 3315 if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse"); 3316 3317 ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr); 3318 if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse"); 3319 3320 /* Update diagonal and off-diagonal portions of submat */ 3321 asub = (Mat_MPIAIJ*)(*submat)->data; 3322 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr); 3323 ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr); 3324 if (n) { 3325 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr); 3326 } 3327 ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3328 ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3329 3330 } else { /* call == MAT_INITIAL_MATRIX) */ 3331 const PetscInt *garray; 3332 PetscInt BsubN; 3333 3334 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3335 ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr); 3336 3337 /* Create local submatrices Asub and Bsub */ 3338 ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr); 3339 ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr); 3340 3341 /* Create submatrix M */ 3342 ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr); 3343 3344 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3345 asub = (Mat_MPIAIJ*)M->data; 3346 3347 ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr); 3348 n = asub->B->cmap->N; 3349 if (BsubN > n) { 3350 /* This case can be tested using ~petsc/src/tao/bound/examples/tutorials/runplate2_3 */ 3351 const PetscInt *idx; 3352 PetscInt i,j,*idx_new,*subgarray = asub->garray; 3353 ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr); 3354 3355 ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr); 3356 j = 0; 3357 ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr); 3358 for (i=0; i<n; i++) { 3359 if (j >= BsubN) break; 3360 while (subgarray[i] > garray[j]) j++; 3361 3362 if (subgarray[i] == garray[j]) { 3363 idx_new[i] = idx[j++]; 3364 } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]); 3365 } 3366 ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr); 3367 3368 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3369 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr); 3370 3371 } else if (BsubN < n) { 3372 SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N); 3373 } 3374 3375 ierr = PetscFree(garray);CHKERRQ(ierr); 3376 *submat = M; 3377 3378 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3379 ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr); 3380 ierr = ISDestroy(&isrow_d);CHKERRQ(ierr); 3381 3382 ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr); 3383 ierr = ISDestroy(&iscol_d);CHKERRQ(ierr); 3384 3385 ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr); 3386 ierr = ISDestroy(&iscol_o);CHKERRQ(ierr); 3387 } 3388 PetscFunctionReturn(0); 3389 } 3390 3391 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat) 3392 { 3393 PetscErrorCode ierr; 3394 IS iscol_local=NULL,isrow_d; 3395 PetscInt csize; 3396 PetscInt n,i,j,start,end; 3397 PetscBool sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2]; 3398 MPI_Comm comm; 3399 3400 PetscFunctionBegin; 3401 /* If isrow has same processor distribution as mat, 3402 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3403 if (call == MAT_REUSE_MATRIX) { 3404 ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr); 3405 if (isrow_d) { 3406 sameRowDist = PETSC_TRUE; 3407 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3408 } else { 3409 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3410 if (iscol_local) { 3411 sameRowDist = PETSC_TRUE; 3412 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3413 } 3414 } 3415 } else { 3416 /* Check if isrow has same processor distribution as mat */ 3417 sameDist[0] = PETSC_FALSE; 3418 ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr); 3419 if (!n) { 3420 sameDist[0] = PETSC_TRUE; 3421 } else { 3422 ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr); 3423 ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr); 3424 if (i >= start && j < end) { 3425 sameDist[0] = PETSC_TRUE; 3426 } 3427 } 3428 3429 /* Check if iscol has same processor distribution as mat */ 3430 sameDist[1] = PETSC_FALSE; 3431 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3432 if (!n) { 3433 sameDist[1] = PETSC_TRUE; 3434 } else { 3435 ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr); 3436 ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr); 3437 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3438 } 3439 3440 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3441 ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr); 3442 sameRowDist = tsameDist[0]; 3443 } 3444 3445 if (sameRowDist) { 3446 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3447 /* isrow and iscol have same processor distribution as mat */ 3448 ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr); 3449 PetscFunctionReturn(0); 3450 } else { /* sameRowDist */ 3451 /* isrow has same processor distribution as mat */ 3452 if (call == MAT_INITIAL_MATRIX) { 3453 PetscBool sorted; 3454 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3455 ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */ 3456 ierr = ISGetSize(iscol,&i);CHKERRQ(ierr); 3457 if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i); 3458 3459 ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr); 3460 if (sorted) { 3461 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3462 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr); 3463 PetscFunctionReturn(0); 3464 } 3465 } else { /* call == MAT_REUSE_MATRIX */ 3466 IS iscol_sub; 3467 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3468 if (iscol_sub) { 3469 ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr); 3470 PetscFunctionReturn(0); 3471 } 3472 } 3473 } 3474 } 3475 3476 /* General case: iscol -> iscol_local which has global size of iscol */ 3477 if (call == MAT_REUSE_MATRIX) { 3478 ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr); 3479 if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3480 } else { 3481 if (!iscol_local) { 3482 ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr); 3483 } 3484 } 3485 3486 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3487 ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr); 3488 3489 if (call == MAT_INITIAL_MATRIX) { 3490 ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3491 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3492 } 3493 PetscFunctionReturn(0); 3494 } 3495 3496 /*@C 3497 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3498 and "off-diagonal" part of the matrix in CSR format. 3499 3500 Collective 3501 3502 Input Parameters: 3503 + comm - MPI communicator 3504 . A - "diagonal" portion of matrix 3505 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3506 - garray - global index of B columns 3507 3508 Output Parameter: 3509 . mat - the matrix, with input A as its local diagonal matrix 3510 Level: advanced 3511 3512 Notes: 3513 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3514 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3515 3516 .seealso: MatCreateMPIAIJWithSplitArrays() 3517 @*/ 3518 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat) 3519 { 3520 PetscErrorCode ierr; 3521 Mat_MPIAIJ *maij; 3522 Mat_SeqAIJ *b=(Mat_SeqAIJ*)B->data,*bnew; 3523 PetscInt *oi=b->i,*oj=b->j,i,nz,col; 3524 PetscScalar *oa=b->a; 3525 Mat Bnew; 3526 PetscInt m,n,N; 3527 3528 PetscFunctionBegin; 3529 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3530 ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr); 3531 if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N); 3532 if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs); 3533 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3534 /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */ 3535 3536 /* Get global columns of mat */ 3537 ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3538 3539 ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr); 3540 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 3541 ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr); 3542 maij = (Mat_MPIAIJ*)(*mat)->data; 3543 3544 (*mat)->preallocated = PETSC_TRUE; 3545 3546 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 3547 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 3548 3549 /* Set A as diagonal portion of *mat */ 3550 maij->A = A; 3551 3552 nz = oi[m]; 3553 for (i=0; i<nz; i++) { 3554 col = oj[i]; 3555 oj[i] = garray[col]; 3556 } 3557 3558 /* Set Bnew as off-diagonal portion of *mat */ 3559 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,oa,&Bnew);CHKERRQ(ierr); 3560 bnew = (Mat_SeqAIJ*)Bnew->data; 3561 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3562 maij->B = Bnew; 3563 3564 if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N); 3565 3566 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3567 b->free_a = PETSC_FALSE; 3568 b->free_ij = PETSC_FALSE; 3569 ierr = MatDestroy(&B);CHKERRQ(ierr); 3570 3571 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3572 bnew->free_a = PETSC_TRUE; 3573 bnew->free_ij = PETSC_TRUE; 3574 3575 /* condense columns of maij->B */ 3576 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 3577 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3578 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3579 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 3580 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3581 PetscFunctionReturn(0); 3582 } 3583 3584 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*); 3585 3586 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat) 3587 { 3588 PetscErrorCode ierr; 3589 PetscInt i,m,n,rstart,row,rend,nz,j,bs,cbs; 3590 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3591 Mat_MPIAIJ *a=(Mat_MPIAIJ*)mat->data; 3592 Mat M,Msub,B=a->B; 3593 MatScalar *aa; 3594 Mat_SeqAIJ *aij; 3595 PetscInt *garray = a->garray,*colsub,Ncols; 3596 PetscInt count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend; 3597 IS iscol_sub,iscmap; 3598 const PetscInt *is_idx,*cmap; 3599 PetscBool allcolumns=PETSC_FALSE; 3600 MPI_Comm comm; 3601 3602 PetscFunctionBegin; 3603 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3604 3605 if (call == MAT_REUSE_MATRIX) { 3606 ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr); 3607 if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse"); 3608 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3609 3610 ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr); 3611 if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse"); 3612 3613 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr); 3614 if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3615 3616 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr); 3617 3618 } else { /* call == MAT_INITIAL_MATRIX) */ 3619 PetscBool flg; 3620 3621 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3622 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); 3623 3624 /* (1) iscol -> nonscalable iscol_local */ 3625 /* Check for special case: each processor gets entire matrix columns */ 3626 ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr); 3627 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3628 if (allcolumns) { 3629 iscol_sub = iscol_local; 3630 ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr); 3631 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr); 3632 3633 } else { 3634 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3635 PetscInt *idx,*cmap1,k; 3636 ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr); 3637 ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr); 3638 ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3639 count = 0; 3640 k = 0; 3641 for (i=0; i<Ncols; i++) { 3642 j = is_idx[i]; 3643 if (j >= cstart && j < cend) { 3644 /* diagonal part of mat */ 3645 idx[count] = j; 3646 cmap1[count++] = i; /* column index in submat */ 3647 } else if (Bn) { 3648 /* off-diagonal part of mat */ 3649 if (j == garray[k]) { 3650 idx[count] = j; 3651 cmap1[count++] = i; /* column index in submat */ 3652 } else if (j > garray[k]) { 3653 while (j > garray[k] && k < Bn-1) k++; 3654 if (j == garray[k]) { 3655 idx[count] = j; 3656 cmap1[count++] = i; /* column index in submat */ 3657 } 3658 } 3659 } 3660 } 3661 ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr); 3662 3663 ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr); 3664 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3665 ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr); 3666 3667 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr); 3668 } 3669 3670 /* (3) Create sequential Msub */ 3671 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr); 3672 } 3673 3674 ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr); 3675 aij = (Mat_SeqAIJ*)(Msub)->data; 3676 ii = aij->i; 3677 ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr); 3678 3679 /* 3680 m - number of local rows 3681 Ncols - number of columns (same on all processors) 3682 rstart - first row in new global matrix generated 3683 */ 3684 ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr); 3685 3686 if (call == MAT_INITIAL_MATRIX) { 3687 /* (4) Create parallel newmat */ 3688 PetscMPIInt rank,size; 3689 PetscInt csize; 3690 3691 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3692 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3693 3694 /* 3695 Determine the number of non-zeros in the diagonal and off-diagonal 3696 portions of the matrix in order to do correct preallocation 3697 */ 3698 3699 /* first get start and end of "diagonal" columns */ 3700 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 3701 if (csize == PETSC_DECIDE) { 3702 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3703 if (mglobal == Ncols) { /* square matrix */ 3704 nlocal = m; 3705 } else { 3706 nlocal = Ncols/size + ((Ncols % size) > rank); 3707 } 3708 } else { 3709 nlocal = csize; 3710 } 3711 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3712 rstart = rend - nlocal; 3713 if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols); 3714 3715 /* next, compute all the lengths */ 3716 jj = aij->j; 3717 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3718 olens = dlens + m; 3719 for (i=0; i<m; i++) { 3720 jend = ii[i+1] - ii[i]; 3721 olen = 0; 3722 dlen = 0; 3723 for (j=0; j<jend; j++) { 3724 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3725 else dlen++; 3726 jj++; 3727 } 3728 olens[i] = olen; 3729 dlens[i] = dlen; 3730 } 3731 3732 ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr); 3733 ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr); 3734 3735 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3736 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 3737 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3738 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3739 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3740 ierr = PetscFree(dlens);CHKERRQ(ierr); 3741 3742 } else { /* call == MAT_REUSE_MATRIX */ 3743 M = *newmat; 3744 ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr); 3745 if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3746 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3747 /* 3748 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3749 rather than the slower MatSetValues(). 3750 */ 3751 M->was_assembled = PETSC_TRUE; 3752 M->assembled = PETSC_FALSE; 3753 } 3754 3755 /* (5) Set values of Msub to *newmat */ 3756 ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr); 3757 ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr); 3758 3759 jj = aij->j; 3760 aa = aij->a; 3761 for (i=0; i<m; i++) { 3762 row = rstart + i; 3763 nz = ii[i+1] - ii[i]; 3764 for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]]; 3765 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr); 3766 jj += nz; aa += nz; 3767 } 3768 ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr); 3769 3770 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3771 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3772 3773 ierr = PetscFree(colsub);CHKERRQ(ierr); 3774 3775 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3776 if (call == MAT_INITIAL_MATRIX) { 3777 *newmat = M; 3778 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr); 3779 ierr = MatDestroy(&Msub);CHKERRQ(ierr); 3780 3781 ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr); 3782 ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr); 3783 3784 ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr); 3785 ierr = ISDestroy(&iscmap);CHKERRQ(ierr); 3786 3787 if (iscol_local) { 3788 ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr); 3789 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 3790 } 3791 } 3792 PetscFunctionReturn(0); 3793 } 3794 3795 /* 3796 Not great since it makes two copies of the submatrix, first an SeqAIJ 3797 in local and then by concatenating the local matrices the end result. 3798 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3799 3800 Note: This requires a sequential iscol with all indices. 3801 */ 3802 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat) 3803 { 3804 PetscErrorCode ierr; 3805 PetscMPIInt rank,size; 3806 PetscInt i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs; 3807 PetscInt *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal; 3808 Mat M,Mreuse; 3809 MatScalar *aa,*vwork; 3810 MPI_Comm comm; 3811 Mat_SeqAIJ *aij; 3812 PetscBool colflag,allcolumns=PETSC_FALSE; 3813 3814 PetscFunctionBegin; 3815 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 3816 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 3817 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3818 3819 /* Check for special case: each processor gets entire matrix columns */ 3820 ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr); 3821 ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr); 3822 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3823 3824 if (call == MAT_REUSE_MATRIX) { 3825 ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr); 3826 if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse"); 3827 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3828 } else { 3829 ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr); 3830 } 3831 3832 /* 3833 m - number of local rows 3834 n - number of columns (same on all processors) 3835 rstart - first row in new global matrix generated 3836 */ 3837 ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr); 3838 ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr); 3839 if (call == MAT_INITIAL_MATRIX) { 3840 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3841 ii = aij->i; 3842 jj = aij->j; 3843 3844 /* 3845 Determine the number of non-zeros in the diagonal and off-diagonal 3846 portions of the matrix in order to do correct preallocation 3847 */ 3848 3849 /* first get start and end of "diagonal" columns */ 3850 if (csize == PETSC_DECIDE) { 3851 ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr); 3852 if (mglobal == n) { /* square matrix */ 3853 nlocal = m; 3854 } else { 3855 nlocal = n/size + ((n % size) > rank); 3856 } 3857 } else { 3858 nlocal = csize; 3859 } 3860 ierr = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 3861 rstart = rend - nlocal; 3862 if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n); 3863 3864 /* next, compute all the lengths */ 3865 ierr = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr); 3866 olens = dlens + m; 3867 for (i=0; i<m; i++) { 3868 jend = ii[i+1] - ii[i]; 3869 olen = 0; 3870 dlen = 0; 3871 for (j=0; j<jend; j++) { 3872 if (*jj < rstart || *jj >= rend) olen++; 3873 else dlen++; 3874 jj++; 3875 } 3876 olens[i] = olen; 3877 dlens[i] = dlen; 3878 } 3879 ierr = MatCreate(comm,&M);CHKERRQ(ierr); 3880 ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr); 3881 ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); 3882 ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr); 3883 ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr); 3884 ierr = PetscFree(dlens);CHKERRQ(ierr); 3885 } else { 3886 PetscInt ml,nl; 3887 3888 M = *newmat; 3889 ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr); 3890 if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request"); 3891 ierr = MatZeroEntries(M);CHKERRQ(ierr); 3892 /* 3893 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3894 rather than the slower MatSetValues(). 3895 */ 3896 M->was_assembled = PETSC_TRUE; 3897 M->assembled = PETSC_FALSE; 3898 } 3899 ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr); 3900 aij = (Mat_SeqAIJ*)(Mreuse)->data; 3901 ii = aij->i; 3902 jj = aij->j; 3903 aa = aij->a; 3904 for (i=0; i<m; i++) { 3905 row = rstart + i; 3906 nz = ii[i+1] - ii[i]; 3907 cwork = jj; jj += nz; 3908 vwork = aa; aa += nz; 3909 ierr = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr); 3910 } 3911 3912 ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3913 ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3914 *newmat = M; 3915 3916 /* save submatrix used in processor for next request */ 3917 if (call == MAT_INITIAL_MATRIX) { 3918 ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr); 3919 ierr = MatDestroy(&Mreuse);CHKERRQ(ierr); 3920 } 3921 PetscFunctionReturn(0); 3922 } 3923 3924 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 3925 { 3926 PetscInt m,cstart, cend,j,nnz,i,d; 3927 PetscInt *d_nnz,*o_nnz,nnz_max = 0,rstart,ii; 3928 const PetscInt *JJ; 3929 PetscErrorCode ierr; 3930 PetscBool nooffprocentries; 3931 3932 PetscFunctionBegin; 3933 if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]); 3934 3935 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 3936 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 3937 m = B->rmap->n; 3938 cstart = B->cmap->rstart; 3939 cend = B->cmap->rend; 3940 rstart = B->rmap->rstart; 3941 3942 ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr); 3943 3944 #if defined(PETSC_USE_DEBUG) 3945 for (i=0; i<m; i++) { 3946 nnz = Ii[i+1]- Ii[i]; 3947 JJ = J + Ii[i]; 3948 if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz); 3949 if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]); 3950 if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N); 3951 } 3952 #endif 3953 3954 for (i=0; i<m; i++) { 3955 nnz = Ii[i+1]- Ii[i]; 3956 JJ = J + Ii[i]; 3957 nnz_max = PetscMax(nnz_max,nnz); 3958 d = 0; 3959 for (j=0; j<nnz; j++) { 3960 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3961 } 3962 d_nnz[i] = d; 3963 o_nnz[i] = nnz - d; 3964 } 3965 ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr); 3966 ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr); 3967 3968 for (i=0; i<m; i++) { 3969 ii = i + rstart; 3970 ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr); 3971 } 3972 nooffprocentries = B->nooffprocentries; 3973 B->nooffprocentries = PETSC_TRUE; 3974 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3975 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3976 B->nooffprocentries = nooffprocentries; 3977 3978 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3979 PetscFunctionReturn(0); 3980 } 3981 3982 /*@ 3983 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3984 (the default parallel PETSc format). 3985 3986 Collective 3987 3988 Input Parameters: 3989 + B - the matrix 3990 . i - the indices into j for the start of each local row (starts with zero) 3991 . j - the column indices for each local row (starts with zero) 3992 - v - optional values in the matrix 3993 3994 Level: developer 3995 3996 Notes: 3997 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3998 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3999 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4000 4001 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4002 4003 The format which is used for the sparse matrix input, is equivalent to a 4004 row-major ordering.. i.e for the following matrix, the input data expected is 4005 as shown 4006 4007 $ 1 0 0 4008 $ 2 0 3 P0 4009 $ ------- 4010 $ 4 5 6 P1 4011 $ 4012 $ Process0 [P0]: rows_owned=[0,1] 4013 $ i = {0,1,3} [size = nrow+1 = 2+1] 4014 $ j = {0,0,2} [size = 3] 4015 $ v = {1,2,3} [size = 3] 4016 $ 4017 $ Process1 [P1]: rows_owned=[2] 4018 $ i = {0,3} [size = nrow+1 = 1+1] 4019 $ j = {0,1,2} [size = 3] 4020 $ v = {4,5,6} [size = 3] 4021 4022 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ, 4023 MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays() 4024 @*/ 4025 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 4026 { 4027 PetscErrorCode ierr; 4028 4029 PetscFunctionBegin; 4030 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr); 4031 PetscFunctionReturn(0); 4032 } 4033 4034 /*@C 4035 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 4036 (the default parallel PETSc format). For good matrix assembly performance 4037 the user should preallocate the matrix storage by setting the parameters 4038 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4039 performance can be increased by more than a factor of 50. 4040 4041 Collective 4042 4043 Input Parameters: 4044 + B - the matrix 4045 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4046 (same value is used for all local rows) 4047 . d_nnz - array containing the number of nonzeros in the various rows of the 4048 DIAGONAL portion of the local submatrix (possibly different for each row) 4049 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 4050 The size of this array is equal to the number of local rows, i.e 'm'. 4051 For matrices that will be factored, you must leave room for (and set) 4052 the diagonal entry even if it is zero. 4053 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4054 submatrix (same value is used for all local rows). 4055 - o_nnz - array containing the number of nonzeros in the various rows of the 4056 OFF-DIAGONAL portion of the local submatrix (possibly different for 4057 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 4058 structure. The size of this array is equal to the number 4059 of local rows, i.e 'm'. 4060 4061 If the *_nnz parameter is given then the *_nz parameter is ignored 4062 4063 The AIJ format (also called the Yale sparse matrix format or 4064 compressed row storage (CSR)), is fully compatible with standard Fortran 77 4065 storage. The stored row and column indices begin with zero. 4066 See Users-Manual: ch_mat for details. 4067 4068 The parallel matrix is partitioned such that the first m0 rows belong to 4069 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4070 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4071 4072 The DIAGONAL portion of the local submatrix of a processor can be defined 4073 as the submatrix which is obtained by extraction the part corresponding to 4074 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4075 first row that belongs to the processor, r2 is the last row belonging to 4076 the this processor, and c1-c2 is range of indices of the local part of a 4077 vector suitable for applying the matrix to. This is an mxn matrix. In the 4078 common case of a square matrix, the row and column ranges are the same and 4079 the DIAGONAL part is also square. The remaining portion of the local 4080 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4081 4082 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4083 4084 You can call MatGetInfo() to get information on how effective the preallocation was; 4085 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4086 You can also run with the option -info and look for messages with the string 4087 malloc in them to see if additional memory allocation was needed. 4088 4089 Example usage: 4090 4091 Consider the following 8x8 matrix with 34 non-zero values, that is 4092 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4093 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4094 as follows: 4095 4096 .vb 4097 1 2 0 | 0 3 0 | 0 4 4098 Proc0 0 5 6 | 7 0 0 | 8 0 4099 9 0 10 | 11 0 0 | 12 0 4100 ------------------------------------- 4101 13 0 14 | 15 16 17 | 0 0 4102 Proc1 0 18 0 | 19 20 21 | 0 0 4103 0 0 0 | 22 23 0 | 24 0 4104 ------------------------------------- 4105 Proc2 25 26 27 | 0 0 28 | 29 0 4106 30 0 0 | 31 32 33 | 0 34 4107 .ve 4108 4109 This can be represented as a collection of submatrices as: 4110 4111 .vb 4112 A B C 4113 D E F 4114 G H I 4115 .ve 4116 4117 Where the submatrices A,B,C are owned by proc0, D,E,F are 4118 owned by proc1, G,H,I are owned by proc2. 4119 4120 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4121 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4122 The 'M','N' parameters are 8,8, and have the same values on all procs. 4123 4124 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4125 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4126 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4127 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4128 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4129 matrix, ans [DF] as another SeqAIJ matrix. 4130 4131 When d_nz, o_nz parameters are specified, d_nz storage elements are 4132 allocated for every row of the local diagonal submatrix, and o_nz 4133 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4134 One way to choose d_nz and o_nz is to use the max nonzerors per local 4135 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4136 In this case, the values of d_nz,o_nz are: 4137 .vb 4138 proc0 : dnz = 2, o_nz = 2 4139 proc1 : dnz = 3, o_nz = 2 4140 proc2 : dnz = 1, o_nz = 4 4141 .ve 4142 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4143 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4144 for proc3. i.e we are using 12+15+10=37 storage locations to store 4145 34 values. 4146 4147 When d_nnz, o_nnz parameters are specified, the storage is specified 4148 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4149 In the above case the values for d_nnz,o_nnz are: 4150 .vb 4151 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4152 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4153 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4154 .ve 4155 Here the space allocated is sum of all the above values i.e 34, and 4156 hence pre-allocation is perfect. 4157 4158 Level: intermediate 4159 4160 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(), 4161 MATMPIAIJ, MatGetInfo(), PetscSplitOwnership() 4162 @*/ 4163 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[]) 4164 { 4165 PetscErrorCode ierr; 4166 4167 PetscFunctionBegin; 4168 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 4169 PetscValidType(B,1); 4170 ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr); 4171 PetscFunctionReturn(0); 4172 } 4173 4174 /*@ 4175 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4176 CSR format for the local rows. 4177 4178 Collective 4179 4180 Input Parameters: 4181 + comm - MPI communicator 4182 . m - number of local rows (Cannot be PETSC_DECIDE) 4183 . n - This value should be the same as the local size used in creating the 4184 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4185 calculated if N is given) For square matrices n is almost always m. 4186 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4187 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4188 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4189 . j - column indices 4190 - a - matrix values 4191 4192 Output Parameter: 4193 . mat - the matrix 4194 4195 Level: intermediate 4196 4197 Notes: 4198 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4199 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4200 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4201 4202 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4203 4204 The format which is used for the sparse matrix input, is equivalent to a 4205 row-major ordering.. i.e for the following matrix, the input data expected is 4206 as shown 4207 4208 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4209 4210 $ 1 0 0 4211 $ 2 0 3 P0 4212 $ ------- 4213 $ 4 5 6 P1 4214 $ 4215 $ Process0 [P0]: rows_owned=[0,1] 4216 $ i = {0,1,3} [size = nrow+1 = 2+1] 4217 $ j = {0,0,2} [size = 3] 4218 $ v = {1,2,3} [size = 3] 4219 $ 4220 $ Process1 [P1]: rows_owned=[2] 4221 $ i = {0,3} [size = nrow+1 = 1+1] 4222 $ j = {0,1,2} [size = 3] 4223 $ v = {4,5,6} [size = 3] 4224 4225 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4226 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4227 @*/ 4228 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat) 4229 { 4230 PetscErrorCode ierr; 4231 4232 PetscFunctionBegin; 4233 if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4234 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4235 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 4236 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 4237 /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */ 4238 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 4239 ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr); 4240 PetscFunctionReturn(0); 4241 } 4242 4243 /*@ 4244 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4245 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical 4246 4247 Collective 4248 4249 Input Parameters: 4250 + mat - the matrix 4251 . m - number of local rows (Cannot be PETSC_DECIDE) 4252 . n - This value should be the same as the local size used in creating the 4253 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4254 calculated if N is given) For square matrices n is almost always m. 4255 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4256 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4257 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4258 . J - column indices 4259 - v - matrix values 4260 4261 Level: intermediate 4262 4263 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4264 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays() 4265 @*/ 4266 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[]) 4267 { 4268 PetscErrorCode ierr; 4269 PetscInt cstart,nnz,i,j; 4270 PetscInt *ld; 4271 PetscBool nooffprocentries; 4272 Mat_MPIAIJ *Aij = (Mat_MPIAIJ*)mat->data; 4273 Mat_SeqAIJ *Ad = (Mat_SeqAIJ*)Aij->A->data, *Ao = (Mat_SeqAIJ*)Aij->B->data; 4274 PetscScalar *ad = Ad->a, *ao = Ao->a; 4275 const PetscInt *Adi = Ad->i; 4276 PetscInt ldi,Iii,md; 4277 4278 PetscFunctionBegin; 4279 if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 4280 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4281 if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4282 if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4283 4284 cstart = mat->cmap->rstart; 4285 if (!Aij->ld) { 4286 /* count number of entries below block diagonal */ 4287 ierr = PetscCalloc1(m,&ld);CHKERRQ(ierr); 4288 Aij->ld = ld; 4289 for (i=0; i<m; i++) { 4290 nnz = Ii[i+1]- Ii[i]; 4291 j = 0; 4292 while (J[j] < cstart && j < nnz) {j++;} 4293 J += nnz; 4294 ld[i] = j; 4295 } 4296 } else { 4297 ld = Aij->ld; 4298 } 4299 4300 for (i=0; i<m; i++) { 4301 nnz = Ii[i+1]- Ii[i]; 4302 Iii = Ii[i]; 4303 ldi = ld[i]; 4304 md = Adi[i+1]-Adi[i]; 4305 ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr); 4306 ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr); 4307 ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr); 4308 ad += md; 4309 ao += nnz - md; 4310 } 4311 nooffprocentries = mat->nooffprocentries; 4312 mat->nooffprocentries = PETSC_TRUE; 4313 ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr); 4314 ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr); 4315 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 4316 ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4317 ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4318 mat->nooffprocentries = nooffprocentries; 4319 PetscFunctionReturn(0); 4320 } 4321 4322 /*@C 4323 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4324 (the default parallel PETSc format). For good matrix assembly performance 4325 the user should preallocate the matrix storage by setting the parameters 4326 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4327 performance can be increased by more than a factor of 50. 4328 4329 Collective 4330 4331 Input Parameters: 4332 + comm - MPI communicator 4333 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4334 This value should be the same as the local size used in creating the 4335 y vector for the matrix-vector product y = Ax. 4336 . n - This value should be the same as the local size used in creating the 4337 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4338 calculated if N is given) For square matrices n is almost always m. 4339 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4340 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4341 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4342 (same value is used for all local rows) 4343 . d_nnz - array containing the number of nonzeros in the various rows of the 4344 DIAGONAL portion of the local submatrix (possibly different for each row) 4345 or NULL, if d_nz is used to specify the nonzero structure. 4346 The size of this array is equal to the number of local rows, i.e 'm'. 4347 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4348 submatrix (same value is used for all local rows). 4349 - o_nnz - array containing the number of nonzeros in the various rows of the 4350 OFF-DIAGONAL portion of the local submatrix (possibly different for 4351 each row) or NULL, if o_nz is used to specify the nonzero 4352 structure. The size of this array is equal to the number 4353 of local rows, i.e 'm'. 4354 4355 Output Parameter: 4356 . A - the matrix 4357 4358 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4359 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4360 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4361 4362 Notes: 4363 If the *_nnz parameter is given then the *_nz parameter is ignored 4364 4365 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4366 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4367 storage requirements for this matrix. 4368 4369 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4370 processor than it must be used on all processors that share the object for 4371 that argument. 4372 4373 The user MUST specify either the local or global matrix dimensions 4374 (possibly both). 4375 4376 The parallel matrix is partitioned across processors such that the 4377 first m0 rows belong to process 0, the next m1 rows belong to 4378 process 1, the next m2 rows belong to process 2 etc.. where 4379 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4380 values corresponding to [m x N] submatrix. 4381 4382 The columns are logically partitioned with the n0 columns belonging 4383 to 0th partition, the next n1 columns belonging to the next 4384 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4385 4386 The DIAGONAL portion of the local submatrix on any given processor 4387 is the submatrix corresponding to the rows and columns m,n 4388 corresponding to the given processor. i.e diagonal matrix on 4389 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4390 etc. The remaining portion of the local submatrix [m x (N-n)] 4391 constitute the OFF-DIAGONAL portion. The example below better 4392 illustrates this concept. 4393 4394 For a square global matrix we define each processor's diagonal portion 4395 to be its local rows and the corresponding columns (a square submatrix); 4396 each processor's off-diagonal portion encompasses the remainder of the 4397 local matrix (a rectangular submatrix). 4398 4399 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4400 4401 When calling this routine with a single process communicator, a matrix of 4402 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4403 type of communicator, use the construction mechanism 4404 .vb 4405 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4406 .ve 4407 4408 $ MatCreate(...,&A); 4409 $ MatSetType(A,MATMPIAIJ); 4410 $ MatSetSizes(A, m,n,M,N); 4411 $ MatMPIAIJSetPreallocation(A,...); 4412 4413 By default, this format uses inodes (identical nodes) when possible. 4414 We search for consecutive rows with the same nonzero structure, thereby 4415 reusing matrix information to achieve increased efficiency. 4416 4417 Options Database Keys: 4418 + -mat_no_inode - Do not use inodes 4419 - -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4420 4421 4422 4423 Example usage: 4424 4425 Consider the following 8x8 matrix with 34 non-zero values, that is 4426 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4427 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4428 as follows 4429 4430 .vb 4431 1 2 0 | 0 3 0 | 0 4 4432 Proc0 0 5 6 | 7 0 0 | 8 0 4433 9 0 10 | 11 0 0 | 12 0 4434 ------------------------------------- 4435 13 0 14 | 15 16 17 | 0 0 4436 Proc1 0 18 0 | 19 20 21 | 0 0 4437 0 0 0 | 22 23 0 | 24 0 4438 ------------------------------------- 4439 Proc2 25 26 27 | 0 0 28 | 29 0 4440 30 0 0 | 31 32 33 | 0 34 4441 .ve 4442 4443 This can be represented as a collection of submatrices as 4444 4445 .vb 4446 A B C 4447 D E F 4448 G H I 4449 .ve 4450 4451 Where the submatrices A,B,C are owned by proc0, D,E,F are 4452 owned by proc1, G,H,I are owned by proc2. 4453 4454 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4455 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4456 The 'M','N' parameters are 8,8, and have the same values on all procs. 4457 4458 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4459 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4460 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4461 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4462 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4463 matrix, ans [DF] as another SeqAIJ matrix. 4464 4465 When d_nz, o_nz parameters are specified, d_nz storage elements are 4466 allocated for every row of the local diagonal submatrix, and o_nz 4467 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4468 One way to choose d_nz and o_nz is to use the max nonzerors per local 4469 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4470 In this case, the values of d_nz,o_nz are 4471 .vb 4472 proc0 : dnz = 2, o_nz = 2 4473 proc1 : dnz = 3, o_nz = 2 4474 proc2 : dnz = 1, o_nz = 4 4475 .ve 4476 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4477 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4478 for proc3. i.e we are using 12+15+10=37 storage locations to store 4479 34 values. 4480 4481 When d_nnz, o_nnz parameters are specified, the storage is specified 4482 for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4483 In the above case the values for d_nnz,o_nnz are 4484 .vb 4485 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4486 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4487 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4488 .ve 4489 Here the space allocated is sum of all the above values i.e 34, and 4490 hence pre-allocation is perfect. 4491 4492 Level: intermediate 4493 4494 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 4495 MATMPIAIJ, MatCreateMPIAIJWithArrays() 4496 @*/ 4497 PetscErrorCode MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A) 4498 { 4499 PetscErrorCode ierr; 4500 PetscMPIInt size; 4501 4502 PetscFunctionBegin; 4503 ierr = MatCreate(comm,A);CHKERRQ(ierr); 4504 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 4505 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4506 if (size > 1) { 4507 ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr); 4508 ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr); 4509 } else { 4510 ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr); 4511 ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr); 4512 } 4513 PetscFunctionReturn(0); 4514 } 4515 4516 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[]) 4517 { 4518 Mat_MPIAIJ *a = (Mat_MPIAIJ*)A->data; 4519 PetscBool flg; 4520 PetscErrorCode ierr; 4521 4522 PetscFunctionBegin; 4523 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr); 4524 if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input"); 4525 if (Ad) *Ad = a->A; 4526 if (Ao) *Ao = a->B; 4527 if (colmap) *colmap = a->garray; 4528 PetscFunctionReturn(0); 4529 } 4530 4531 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 4532 { 4533 PetscErrorCode ierr; 4534 PetscInt m,N,i,rstart,nnz,Ii; 4535 PetscInt *indx; 4536 PetscScalar *values; 4537 4538 PetscFunctionBegin; 4539 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 4540 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4541 PetscInt *dnz,*onz,sum,bs,cbs; 4542 4543 if (n == PETSC_DECIDE) { 4544 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 4545 } 4546 /* Check sum(n) = N */ 4547 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4548 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 4549 4550 ierr = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 4551 rstart -= m; 4552 4553 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4554 for (i=0; i<m; i++) { 4555 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4556 ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr); 4557 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr); 4558 } 4559 4560 ierr = MatCreate(comm,outmat);CHKERRQ(ierr); 4561 ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4562 ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr); 4563 ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr); 4564 ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr); 4565 ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr); 4566 ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr); 4567 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 4568 } 4569 4570 /* numeric phase */ 4571 ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr); 4572 for (i=0; i<m; i++) { 4573 ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4574 Ii = i + rstart; 4575 ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4576 ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr); 4577 } 4578 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4579 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4580 PetscFunctionReturn(0); 4581 } 4582 4583 PetscErrorCode MatFileSplit(Mat A,char *outfile) 4584 { 4585 PetscErrorCode ierr; 4586 PetscMPIInt rank; 4587 PetscInt m,N,i,rstart,nnz; 4588 size_t len; 4589 const PetscInt *indx; 4590 PetscViewer out; 4591 char *name; 4592 Mat B; 4593 const PetscScalar *values; 4594 4595 PetscFunctionBegin; 4596 ierr = MatGetLocalSize(A,&m,0);CHKERRQ(ierr); 4597 ierr = MatGetSize(A,0,&N);CHKERRQ(ierr); 4598 /* Should this be the type of the diagonal block of A? */ 4599 ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr); 4600 ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr); 4601 ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr); 4602 ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr); 4603 ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr); 4604 ierr = MatGetOwnershipRange(A,&rstart,0);CHKERRQ(ierr); 4605 for (i=0; i<m; i++) { 4606 ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4607 ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr); 4608 ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr); 4609 } 4610 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4611 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4612 4613 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 4614 ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr); 4615 ierr = PetscMalloc1(len+5,&name);CHKERRQ(ierr); 4616 sprintf(name,"%s.%d",outfile,rank); 4617 ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr); 4618 ierr = PetscFree(name);CHKERRQ(ierr); 4619 ierr = MatView(B,out);CHKERRQ(ierr); 4620 ierr = PetscViewerDestroy(&out);CHKERRQ(ierr); 4621 ierr = MatDestroy(&B);CHKERRQ(ierr); 4622 PetscFunctionReturn(0); 4623 } 4624 4625 PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(Mat A) 4626 { 4627 PetscErrorCode ierr; 4628 Mat_Merge_SeqsToMPI *merge; 4629 PetscContainer container; 4630 4631 PetscFunctionBegin; 4632 ierr = PetscObjectQuery((PetscObject)A,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4633 if (container) { 4634 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4635 ierr = PetscFree(merge->id_r);CHKERRQ(ierr); 4636 ierr = PetscFree(merge->len_s);CHKERRQ(ierr); 4637 ierr = PetscFree(merge->len_r);CHKERRQ(ierr); 4638 ierr = PetscFree(merge->bi);CHKERRQ(ierr); 4639 ierr = PetscFree(merge->bj);CHKERRQ(ierr); 4640 ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr); 4641 ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr); 4642 ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr); 4643 ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr); 4644 ierr = PetscFree(merge->coi);CHKERRQ(ierr); 4645 ierr = PetscFree(merge->coj);CHKERRQ(ierr); 4646 ierr = PetscFree(merge->owners_co);CHKERRQ(ierr); 4647 ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr); 4648 ierr = PetscFree(merge);CHKERRQ(ierr); 4649 ierr = PetscObjectCompose((PetscObject)A,"MatMergeSeqsToMPI",0);CHKERRQ(ierr); 4650 } 4651 ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr); 4652 PetscFunctionReturn(0); 4653 } 4654 4655 #include <../src/mat/utils/freespace.h> 4656 #include <petscbt.h> 4657 4658 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat) 4659 { 4660 PetscErrorCode ierr; 4661 MPI_Comm comm; 4662 Mat_SeqAIJ *a =(Mat_SeqAIJ*)seqmat->data; 4663 PetscMPIInt size,rank,taga,*len_s; 4664 PetscInt N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj; 4665 PetscInt proc,m; 4666 PetscInt **buf_ri,**buf_rj; 4667 PetscInt k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj; 4668 PetscInt nrows,**buf_ri_k,**nextrow,**nextai; 4669 MPI_Request *s_waits,*r_waits; 4670 MPI_Status *status; 4671 MatScalar *aa=a->a; 4672 MatScalar **abuf_r,*ba_i; 4673 Mat_Merge_SeqsToMPI *merge; 4674 PetscContainer container; 4675 4676 PetscFunctionBegin; 4677 ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr); 4678 ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4679 4680 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4681 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4682 4683 ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr); 4684 ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr); 4685 4686 bi = merge->bi; 4687 bj = merge->bj; 4688 buf_ri = merge->buf_ri; 4689 buf_rj = merge->buf_rj; 4690 4691 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4692 owners = merge->rowmap->range; 4693 len_s = merge->len_s; 4694 4695 /* send and recv matrix values */ 4696 /*-----------------------------*/ 4697 ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr); 4698 ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr); 4699 4700 ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr); 4701 for (proc=0,k=0; proc<size; proc++) { 4702 if (!len_s[proc]) continue; 4703 i = owners[proc]; 4704 ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRQ(ierr); 4705 k++; 4706 } 4707 4708 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRQ(ierr);} 4709 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRQ(ierr);} 4710 ierr = PetscFree(status);CHKERRQ(ierr); 4711 4712 ierr = PetscFree(s_waits);CHKERRQ(ierr); 4713 ierr = PetscFree(r_waits);CHKERRQ(ierr); 4714 4715 /* insert mat values of mpimat */ 4716 /*----------------------------*/ 4717 ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr); 4718 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4719 4720 for (k=0; k<merge->nrecv; k++) { 4721 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4722 nrows = *(buf_ri_k[k]); 4723 nextrow[k] = buf_ri_k[k]+1; /* next row number of k-th recved i-structure */ 4724 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4725 } 4726 4727 /* set values of ba */ 4728 m = merge->rowmap->n; 4729 for (i=0; i<m; i++) { 4730 arow = owners[rank] + i; 4731 bj_i = bj+bi[i]; /* col indices of the i-th row of mpimat */ 4732 bnzi = bi[i+1] - bi[i]; 4733 ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr); 4734 4735 /* add local non-zero vals of this proc's seqmat into ba */ 4736 anzi = ai[arow+1] - ai[arow]; 4737 aj = a->j + ai[arow]; 4738 aa = a->a + ai[arow]; 4739 nextaj = 0; 4740 for (j=0; nextaj<anzi; j++) { 4741 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4742 ba_i[j] += aa[nextaj++]; 4743 } 4744 } 4745 4746 /* add received vals into ba */ 4747 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4748 /* i-th row */ 4749 if (i == *nextrow[k]) { 4750 anzi = *(nextai[k]+1) - *nextai[k]; 4751 aj = buf_rj[k] + *(nextai[k]); 4752 aa = abuf_r[k] + *(nextai[k]); 4753 nextaj = 0; 4754 for (j=0; nextaj<anzi; j++) { 4755 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4756 ba_i[j] += aa[nextaj++]; 4757 } 4758 } 4759 nextrow[k]++; nextai[k]++; 4760 } 4761 } 4762 ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr); 4763 } 4764 ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4765 ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 4766 4767 ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr); 4768 ierr = PetscFree(abuf_r);CHKERRQ(ierr); 4769 ierr = PetscFree(ba_i);CHKERRQ(ierr); 4770 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4771 ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr); 4772 PetscFunctionReturn(0); 4773 } 4774 4775 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat) 4776 { 4777 PetscErrorCode ierr; 4778 Mat B_mpi; 4779 Mat_SeqAIJ *a=(Mat_SeqAIJ*)seqmat->data; 4780 PetscMPIInt size,rank,tagi,tagj,*len_s,*len_si,*len_ri; 4781 PetscInt **buf_rj,**buf_ri,**buf_ri_k; 4782 PetscInt M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j; 4783 PetscInt len,proc,*dnz,*onz,bs,cbs; 4784 PetscInt k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0; 4785 PetscInt nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai; 4786 MPI_Request *si_waits,*sj_waits,*ri_waits,*rj_waits; 4787 MPI_Status *status; 4788 PetscFreeSpaceList free_space=NULL,current_space=NULL; 4789 PetscBT lnkbt; 4790 Mat_Merge_SeqsToMPI *merge; 4791 PetscContainer container; 4792 4793 PetscFunctionBegin; 4794 ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 4795 4796 /* make sure it is a PETSc comm */ 4797 ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr); 4798 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 4799 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 4800 4801 ierr = PetscNew(&merge);CHKERRQ(ierr); 4802 ierr = PetscMalloc1(size,&status);CHKERRQ(ierr); 4803 4804 /* determine row ownership */ 4805 /*---------------------------------------------------------*/ 4806 ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr); 4807 ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr); 4808 ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr); 4809 ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr); 4810 ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr); 4811 ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr); 4812 ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr); 4813 4814 m = merge->rowmap->n; 4815 owners = merge->rowmap->range; 4816 4817 /* determine the number of messages to send, their lengths */ 4818 /*---------------------------------------------------------*/ 4819 len_s = merge->len_s; 4820 4821 len = 0; /* length of buf_si[] */ 4822 merge->nsend = 0; 4823 for (proc=0; proc<size; proc++) { 4824 len_si[proc] = 0; 4825 if (proc == rank) { 4826 len_s[proc] = 0; 4827 } else { 4828 len_si[proc] = owners[proc+1] - owners[proc] + 1; 4829 len_s[proc] = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4830 } 4831 if (len_s[proc]) { 4832 merge->nsend++; 4833 nrows = 0; 4834 for (i=owners[proc]; i<owners[proc+1]; i++) { 4835 if (ai[i+1] > ai[i]) nrows++; 4836 } 4837 len_si[proc] = 2*(nrows+1); 4838 len += len_si[proc]; 4839 } 4840 } 4841 4842 /* determine the number and length of messages to receive for ij-structure */ 4843 /*-------------------------------------------------------------------------*/ 4844 ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr); 4845 ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr); 4846 4847 /* post the Irecv of j-structure */ 4848 /*-------------------------------*/ 4849 ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr); 4850 ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr); 4851 4852 /* post the Isend of j-structure */ 4853 /*--------------------------------*/ 4854 ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr); 4855 4856 for (proc=0, k=0; proc<size; proc++) { 4857 if (!len_s[proc]) continue; 4858 i = owners[proc]; 4859 ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRQ(ierr); 4860 k++; 4861 } 4862 4863 /* receives and sends of j-structure are complete */ 4864 /*------------------------------------------------*/ 4865 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRQ(ierr);} 4866 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRQ(ierr);} 4867 4868 /* send and recv i-structure */ 4869 /*---------------------------*/ 4870 ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr); 4871 ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr); 4872 4873 ierr = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr); 4874 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4875 for (proc=0,k=0; proc<size; proc++) { 4876 if (!len_s[proc]) continue; 4877 /* form outgoing message for i-structure: 4878 buf_si[0]: nrows to be sent 4879 [1:nrows]: row index (global) 4880 [nrows+1:2*nrows+1]: i-structure index 4881 */ 4882 /*-------------------------------------------*/ 4883 nrows = len_si[proc]/2 - 1; 4884 buf_si_i = buf_si + nrows+1; 4885 buf_si[0] = nrows; 4886 buf_si_i[0] = 0; 4887 nrows = 0; 4888 for (i=owners[proc]; i<owners[proc+1]; i++) { 4889 anzi = ai[i+1] - ai[i]; 4890 if (anzi) { 4891 buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */ 4892 buf_si[nrows+1] = i-owners[proc]; /* local row index */ 4893 nrows++; 4894 } 4895 } 4896 ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRQ(ierr); 4897 k++; 4898 buf_si += len_si[proc]; 4899 } 4900 4901 if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRQ(ierr);} 4902 if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRQ(ierr);} 4903 4904 ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr); 4905 for (i=0; i<merge->nrecv; i++) { 4906 ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr); 4907 } 4908 4909 ierr = PetscFree(len_si);CHKERRQ(ierr); 4910 ierr = PetscFree(len_ri);CHKERRQ(ierr); 4911 ierr = PetscFree(rj_waits);CHKERRQ(ierr); 4912 ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr); 4913 ierr = PetscFree(ri_waits);CHKERRQ(ierr); 4914 ierr = PetscFree(buf_s);CHKERRQ(ierr); 4915 ierr = PetscFree(status);CHKERRQ(ierr); 4916 4917 /* compute a local seq matrix in each processor */ 4918 /*----------------------------------------------*/ 4919 /* allocate bi array and free space for accumulating nonzero column info */ 4920 ierr = PetscMalloc1(m+1,&bi);CHKERRQ(ierr); 4921 bi[0] = 0; 4922 4923 /* create and initialize a linked list */ 4924 nlnk = N+1; 4925 ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4926 4927 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4928 len = ai[owners[rank+1]] - ai[owners[rank]]; 4929 ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr); 4930 4931 current_space = free_space; 4932 4933 /* determine symbolic info for each local row */ 4934 ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr); 4935 4936 for (k=0; k<merge->nrecv; k++) { 4937 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4938 nrows = *buf_ri_k[k]; 4939 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4940 nextai[k] = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure */ 4941 } 4942 4943 ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr); 4944 len = 0; 4945 for (i=0; i<m; i++) { 4946 bnzi = 0; 4947 /* add local non-zero cols of this proc's seqmat into lnk */ 4948 arow = owners[rank] + i; 4949 anzi = ai[arow+1] - ai[arow]; 4950 aj = a->j + ai[arow]; 4951 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4952 bnzi += nlnk; 4953 /* add received col data into lnk */ 4954 for (k=0; k<merge->nrecv; k++) { /* k-th received message */ 4955 if (i == *nextrow[k]) { /* i-th row */ 4956 anzi = *(nextai[k]+1) - *nextai[k]; 4957 aj = buf_rj[k] + *nextai[k]; 4958 ierr = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr); 4959 bnzi += nlnk; 4960 nextrow[k]++; nextai[k]++; 4961 } 4962 } 4963 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4964 4965 /* if free space is not available, make more free space */ 4966 if (current_space->local_remaining<bnzi) { 4967 ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),¤t_space);CHKERRQ(ierr); 4968 nspacedouble++; 4969 } 4970 /* copy data into free space, then initialize lnk */ 4971 ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr); 4972 ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr); 4973 4974 current_space->array += bnzi; 4975 current_space->local_used += bnzi; 4976 current_space->local_remaining -= bnzi; 4977 4978 bi[i+1] = bi[i] + bnzi; 4979 } 4980 4981 ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr); 4982 4983 ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr); 4984 ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr); 4985 ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr); 4986 4987 /* create symbolic parallel matrix B_mpi */ 4988 /*---------------------------------------*/ 4989 ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr); 4990 ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr); 4991 if (n==PETSC_DECIDE) { 4992 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr); 4993 } else { 4994 ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4995 } 4996 ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr); 4997 ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr); 4998 ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr); 4999 ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr); 5000 ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 5001 5002 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5003 B_mpi->assembled = PETSC_FALSE; 5004 B_mpi->ops->destroy = MatDestroy_MPIAIJ_SeqsToMPI; 5005 merge->bi = bi; 5006 merge->bj = bj; 5007 merge->buf_ri = buf_ri; 5008 merge->buf_rj = buf_rj; 5009 merge->coi = NULL; 5010 merge->coj = NULL; 5011 merge->owners_co = NULL; 5012 5013 ierr = PetscCommDestroy(&comm);CHKERRQ(ierr); 5014 5015 /* attach the supporting struct to B_mpi for reuse */ 5016 ierr = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr); 5017 ierr = PetscContainerSetPointer(container,merge);CHKERRQ(ierr); 5018 ierr = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr); 5019 ierr = PetscContainerDestroy(&container);CHKERRQ(ierr); 5020 *mpimat = B_mpi; 5021 5022 ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr); 5023 PetscFunctionReturn(0); 5024 } 5025 5026 /*@C 5027 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 5028 matrices from each processor 5029 5030 Collective 5031 5032 Input Parameters: 5033 + comm - the communicators the parallel matrix will live on 5034 . seqmat - the input sequential matrices 5035 . m - number of local rows (or PETSC_DECIDE) 5036 . n - number of local columns (or PETSC_DECIDE) 5037 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5038 5039 Output Parameter: 5040 . mpimat - the parallel matrix generated 5041 5042 Level: advanced 5043 5044 Notes: 5045 The dimensions of the sequential matrix in each processor MUST be the same. 5046 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5047 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 5048 @*/ 5049 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat) 5050 { 5051 PetscErrorCode ierr; 5052 PetscMPIInt size; 5053 5054 PetscFunctionBegin; 5055 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5056 if (size == 1) { 5057 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5058 if (scall == MAT_INITIAL_MATRIX) { 5059 ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr); 5060 } else { 5061 ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 5062 } 5063 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5064 PetscFunctionReturn(0); 5065 } 5066 ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5067 if (scall == MAT_INITIAL_MATRIX) { 5068 ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr); 5069 } 5070 ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr); 5071 ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr); 5072 PetscFunctionReturn(0); 5073 } 5074 5075 /*@ 5076 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5077 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5078 with MatGetSize() 5079 5080 Not Collective 5081 5082 Input Parameters: 5083 + A - the matrix 5084 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5085 5086 Output Parameter: 5087 . A_loc - the local sequential matrix generated 5088 5089 Level: developer 5090 5091 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed() 5092 5093 @*/ 5094 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc) 5095 { 5096 PetscErrorCode ierr; 5097 Mat_MPIAIJ *mpimat=(Mat_MPIAIJ*)A->data; 5098 Mat_SeqAIJ *mat,*a,*b; 5099 PetscInt *ai,*aj,*bi,*bj,*cmap=mpimat->garray; 5100 MatScalar *aa,*ba,*cam; 5101 PetscScalar *ca; 5102 PetscInt am=A->rmap->n,i,j,k,cstart=A->cmap->rstart; 5103 PetscInt *ci,*cj,col,ncols_d,ncols_o,jo; 5104 PetscBool match; 5105 MPI_Comm comm; 5106 PetscMPIInt size; 5107 5108 PetscFunctionBegin; 5109 ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr); 5110 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5111 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5112 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5113 if (size == 1 && scall == MAT_REUSE_MATRIX) PetscFunctionReturn(0); 5114 5115 ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5116 a = (Mat_SeqAIJ*)(mpimat->A)->data; 5117 b = (Mat_SeqAIJ*)(mpimat->B)->data; 5118 ai = a->i; aj = a->j; bi = b->i; bj = b->j; 5119 aa = a->a; ba = b->a; 5120 if (scall == MAT_INITIAL_MATRIX) { 5121 if (size == 1) { 5122 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ai,aj,aa,A_loc);CHKERRQ(ierr); 5123 PetscFunctionReturn(0); 5124 } 5125 5126 ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr); 5127 ci[0] = 0; 5128 for (i=0; i<am; i++) { 5129 ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]); 5130 } 5131 ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr); 5132 ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr); 5133 k = 0; 5134 for (i=0; i<am; i++) { 5135 ncols_o = bi[i+1] - bi[i]; 5136 ncols_d = ai[i+1] - ai[i]; 5137 /* off-diagonal portion of A */ 5138 for (jo=0; jo<ncols_o; jo++) { 5139 col = cmap[*bj]; 5140 if (col >= cstart) break; 5141 cj[k] = col; bj++; 5142 ca[k++] = *ba++; 5143 } 5144 /* diagonal portion of A */ 5145 for (j=0; j<ncols_d; j++) { 5146 cj[k] = cstart + *aj++; 5147 ca[k++] = *aa++; 5148 } 5149 /* off-diagonal portion of A */ 5150 for (j=jo; j<ncols_o; j++) { 5151 cj[k] = cmap[*bj++]; 5152 ca[k++] = *ba++; 5153 } 5154 } 5155 /* put together the new matrix */ 5156 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr); 5157 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5158 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5159 mat = (Mat_SeqAIJ*)(*A_loc)->data; 5160 mat->free_a = PETSC_TRUE; 5161 mat->free_ij = PETSC_TRUE; 5162 mat->nonew = 0; 5163 } else if (scall == MAT_REUSE_MATRIX) { 5164 mat=(Mat_SeqAIJ*)(*A_loc)->data; 5165 ci = mat->i; cj = mat->j; cam = mat->a; 5166 for (i=0; i<am; i++) { 5167 /* off-diagonal portion of A */ 5168 ncols_o = bi[i+1] - bi[i]; 5169 for (jo=0; jo<ncols_o; jo++) { 5170 col = cmap[*bj]; 5171 if (col >= cstart) break; 5172 *cam++ = *ba++; bj++; 5173 } 5174 /* diagonal portion of A */ 5175 ncols_d = ai[i+1] - ai[i]; 5176 for (j=0; j<ncols_d; j++) *cam++ = *aa++; 5177 /* off-diagonal portion of A */ 5178 for (j=jo; j<ncols_o; j++) { 5179 *cam++ = *ba++; bj++; 5180 } 5181 } 5182 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall); 5183 ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr); 5184 PetscFunctionReturn(0); 5185 } 5186 5187 /*@C 5188 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5189 5190 Not Collective 5191 5192 Input Parameters: 5193 + A - the matrix 5194 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5195 - row, col - index sets of rows and columns to extract (or NULL) 5196 5197 Output Parameter: 5198 . A_loc - the local sequential matrix generated 5199 5200 Level: developer 5201 5202 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat() 5203 5204 @*/ 5205 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc) 5206 { 5207 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5208 PetscErrorCode ierr; 5209 PetscInt i,start,end,ncols,nzA,nzB,*cmap,imark,*idx; 5210 IS isrowa,iscola; 5211 Mat *aloc; 5212 PetscBool match; 5213 5214 PetscFunctionBegin; 5215 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr); 5216 if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input"); 5217 ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5218 if (!row) { 5219 start = A->rmap->rstart; end = A->rmap->rend; 5220 ierr = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr); 5221 } else { 5222 isrowa = *row; 5223 } 5224 if (!col) { 5225 start = A->cmap->rstart; 5226 cmap = a->garray; 5227 nzA = a->A->cmap->n; 5228 nzB = a->B->cmap->n; 5229 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5230 ncols = 0; 5231 for (i=0; i<nzB; i++) { 5232 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5233 else break; 5234 } 5235 imark = i; 5236 for (i=0; i<nzA; i++) idx[ncols++] = start + i; 5237 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; 5238 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr); 5239 } else { 5240 iscola = *col; 5241 } 5242 if (scall != MAT_INITIAL_MATRIX) { 5243 ierr = PetscMalloc1(1,&aloc);CHKERRQ(ierr); 5244 aloc[0] = *A_loc; 5245 } 5246 ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr); 5247 if (!col) { /* attach global id of condensed columns */ 5248 ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr); 5249 } 5250 *A_loc = aloc[0]; 5251 ierr = PetscFree(aloc);CHKERRQ(ierr); 5252 if (!row) { 5253 ierr = ISDestroy(&isrowa);CHKERRQ(ierr); 5254 } 5255 if (!col) { 5256 ierr = ISDestroy(&iscola);CHKERRQ(ierr); 5257 } 5258 ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr); 5259 PetscFunctionReturn(0); 5260 } 5261 5262 /*@C 5263 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5264 5265 Collective on Mat 5266 5267 Input Parameters: 5268 + A,B - the matrices in mpiaij format 5269 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5270 - rowb, colb - index sets of rows and columns of B to extract (or NULL) 5271 5272 Output Parameter: 5273 + rowb, colb - index sets of rows and columns of B to extract 5274 - B_seq - the sequential matrix generated 5275 5276 Level: developer 5277 5278 @*/ 5279 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq) 5280 { 5281 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5282 PetscErrorCode ierr; 5283 PetscInt *idx,i,start,ncols,nzA,nzB,*cmap,imark; 5284 IS isrowb,iscolb; 5285 Mat *bseq=NULL; 5286 5287 PetscFunctionBegin; 5288 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5289 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5290 } 5291 ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5292 5293 if (scall == MAT_INITIAL_MATRIX) { 5294 start = A->cmap->rstart; 5295 cmap = a->garray; 5296 nzA = a->A->cmap->n; 5297 nzB = a->B->cmap->n; 5298 ierr = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr); 5299 ncols = 0; 5300 for (i=0; i<nzB; i++) { /* row < local row index */ 5301 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5302 else break; 5303 } 5304 imark = i; 5305 for (i=0; i<nzA; i++) idx[ncols++] = start + i; /* local rows */ 5306 for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5307 ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr); 5308 ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr); 5309 } else { 5310 if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5311 isrowb = *rowb; iscolb = *colb; 5312 ierr = PetscMalloc1(1,&bseq);CHKERRQ(ierr); 5313 bseq[0] = *B_seq; 5314 } 5315 ierr = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr); 5316 *B_seq = bseq[0]; 5317 ierr = PetscFree(bseq);CHKERRQ(ierr); 5318 if (!rowb) { 5319 ierr = ISDestroy(&isrowb);CHKERRQ(ierr); 5320 } else { 5321 *rowb = isrowb; 5322 } 5323 if (!colb) { 5324 ierr = ISDestroy(&iscolb);CHKERRQ(ierr); 5325 } else { 5326 *colb = iscolb; 5327 } 5328 ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr); 5329 PetscFunctionReturn(0); 5330 } 5331 5332 /* 5333 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5334 of the OFF-DIAGONAL portion of local A 5335 5336 Collective on Mat 5337 5338 Input Parameters: 5339 + A,B - the matrices in mpiaij format 5340 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5341 5342 Output Parameter: 5343 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5344 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5345 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5346 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5347 5348 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5349 for this matrix. This is not desirable.. 5350 5351 Level: developer 5352 5353 */ 5354 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth) 5355 { 5356 PetscErrorCode ierr; 5357 Mat_MPIAIJ *a=(Mat_MPIAIJ*)A->data; 5358 Mat_SeqAIJ *b_oth; 5359 VecScatter ctx; 5360 MPI_Comm comm; 5361 const PetscMPIInt *rprocs,*sprocs; 5362 const PetscInt *srow,*rstarts,*sstarts; 5363 PetscInt *rowlen,*bufj,*bufJ,ncols,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs; 5364 PetscInt i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = 0,*sstartsj,len; 5365 PetscScalar *b_otha,*bufa,*bufA,*vals; 5366 MPI_Request *rwaits = NULL,*swaits = NULL; 5367 MPI_Status rstatus; 5368 PetscMPIInt jj,size,tag,rank,nsends_mpi,nrecvs_mpi; 5369 5370 PetscFunctionBegin; 5371 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 5372 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 5373 5374 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5375 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend); 5376 } 5377 ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5378 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 5379 5380 if (size == 1) { 5381 startsj_s = NULL; 5382 bufa_ptr = NULL; 5383 *B_oth = NULL; 5384 PetscFunctionReturn(0); 5385 } 5386 5387 ctx = a->Mvctx; 5388 tag = ((PetscObject)ctx)->tag; 5389 5390 if (ctx->inuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE," Scatter ctx already in use"); 5391 ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5392 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5393 ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr); 5394 ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr); 5395 ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr); 5396 ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr); 5397 5398 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5399 if (scall == MAT_INITIAL_MATRIX) { 5400 /* i-array */ 5401 /*---------*/ 5402 /* post receives */ 5403 if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */ 5404 for (i=0; i<nrecvs; i++) { 5405 rowlen = rvalues + rstarts[i]*rbs; 5406 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */ 5407 ierr = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5408 } 5409 5410 /* pack the outgoing message */ 5411 ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr); 5412 5413 sstartsj[0] = 0; 5414 rstartsj[0] = 0; 5415 len = 0; /* total length of j or a array to be sent */ 5416 if (nsends) { 5417 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5418 ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr); 5419 } 5420 for (i=0; i<nsends; i++) { 5421 rowlen = svalues + (sstarts[i]-sstarts[0])*sbs; 5422 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5423 for (j=0; j<nrows; j++) { 5424 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5425 for (l=0; l<sbs; l++) { 5426 ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */ 5427 5428 rowlen[j*sbs+l] = ncols; 5429 5430 len += ncols; 5431 ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); 5432 } 5433 k++; 5434 } 5435 ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5436 5437 sstartsj[i+1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5438 } 5439 /* recvs and sends of i-array are completed */ 5440 i = nrecvs; 5441 while (i--) { 5442 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5443 } 5444 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5445 ierr = PetscFree(svalues);CHKERRQ(ierr); 5446 5447 /* allocate buffers for sending j and a arrays */ 5448 ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr); 5449 ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr); 5450 5451 /* create i-array of B_oth */ 5452 ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr); 5453 5454 b_othi[0] = 0; 5455 len = 0; /* total length of j or a array to be received */ 5456 k = 0; 5457 for (i=0; i<nrecvs; i++) { 5458 rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs; 5459 nrows = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */ 5460 for (j=0; j<nrows; j++) { 5461 b_othi[k+1] = b_othi[k] + rowlen[j]; 5462 ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr); 5463 k++; 5464 } 5465 rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5466 } 5467 ierr = PetscFree(rvalues);CHKERRQ(ierr); 5468 5469 /* allocate space for j and a arrrays of B_oth */ 5470 ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr); 5471 ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr); 5472 5473 /* j-array */ 5474 /*---------*/ 5475 /* post receives of j-array */ 5476 for (i=0; i<nrecvs; i++) { 5477 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5478 ierr = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5479 } 5480 5481 /* pack the outgoing message j-array */ 5482 if (nsends) k = sstarts[0]; 5483 for (i=0; i<nsends; i++) { 5484 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5485 bufJ = bufj+sstartsj[i]; 5486 for (j=0; j<nrows; j++) { 5487 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5488 for (ll=0; ll<sbs; ll++) { 5489 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5490 for (l=0; l<ncols; l++) { 5491 *bufJ++ = cols[l]; 5492 } 5493 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr); 5494 } 5495 } 5496 ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5497 } 5498 5499 /* recvs and sends of j-array are completed */ 5500 i = nrecvs; 5501 while (i--) { 5502 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5503 } 5504 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5505 } else if (scall == MAT_REUSE_MATRIX) { 5506 sstartsj = *startsj_s; 5507 rstartsj = *startsj_r; 5508 bufa = *bufa_ptr; 5509 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5510 b_otha = b_oth->a; 5511 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container"); 5512 5513 /* a-array */ 5514 /*---------*/ 5515 /* post receives of a-array */ 5516 for (i=0; i<nrecvs; i++) { 5517 nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */ 5518 ierr = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr); 5519 } 5520 5521 /* pack the outgoing message a-array */ 5522 if (nsends) k = sstarts[0]; 5523 for (i=0; i<nsends; i++) { 5524 nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */ 5525 bufA = bufa+sstartsj[i]; 5526 for (j=0; j<nrows; j++) { 5527 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5528 for (ll=0; ll<sbs; ll++) { 5529 ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5530 for (l=0; l<ncols; l++) { 5531 *bufA++ = vals[l]; 5532 } 5533 ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr); 5534 } 5535 } 5536 ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRQ(ierr); 5537 } 5538 /* recvs and sends of a-array are completed */ 5539 i = nrecvs; 5540 while (i--) { 5541 ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRQ(ierr); 5542 } 5543 if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRQ(ierr);} 5544 ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr); 5545 5546 if (scall == MAT_INITIAL_MATRIX) { 5547 /* put together the new matrix */ 5548 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr); 5549 5550 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5551 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5552 b_oth = (Mat_SeqAIJ*)(*B_oth)->data; 5553 b_oth->free_a = PETSC_TRUE; 5554 b_oth->free_ij = PETSC_TRUE; 5555 b_oth->nonew = 0; 5556 5557 ierr = PetscFree(bufj);CHKERRQ(ierr); 5558 if (!startsj_s || !bufa_ptr) { 5559 ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr); 5560 ierr = PetscFree(bufa_ptr);CHKERRQ(ierr); 5561 } else { 5562 *startsj_s = sstartsj; 5563 *startsj_r = rstartsj; 5564 *bufa_ptr = bufa; 5565 } 5566 } 5567 5568 ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr); 5569 ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr); 5570 ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr); 5571 PetscFunctionReturn(0); 5572 } 5573 5574 /*@C 5575 MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication. 5576 5577 Not Collective 5578 5579 Input Parameters: 5580 . A - The matrix in mpiaij format 5581 5582 Output Parameter: 5583 + lvec - The local vector holding off-process values from the argument to a matrix-vector product 5584 . colmap - A map from global column index to local index into lvec 5585 - multScatter - A scatter from the argument of a matrix-vector product to lvec 5586 5587 Level: developer 5588 5589 @*/ 5590 #if defined(PETSC_USE_CTABLE) 5591 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter) 5592 #else 5593 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter) 5594 #endif 5595 { 5596 Mat_MPIAIJ *a; 5597 5598 PetscFunctionBegin; 5599 PetscValidHeaderSpecific(A, MAT_CLASSID, 1); 5600 PetscValidPointer(lvec, 2); 5601 PetscValidPointer(colmap, 3); 5602 PetscValidPointer(multScatter, 4); 5603 a = (Mat_MPIAIJ*) A->data; 5604 if (lvec) *lvec = a->lvec; 5605 if (colmap) *colmap = a->colmap; 5606 if (multScatter) *multScatter = a->Mvctx; 5607 PetscFunctionReturn(0); 5608 } 5609 5610 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*); 5611 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*); 5612 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*); 5613 #if defined(PETSC_HAVE_MKL_SPARSE) 5614 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*); 5615 #endif 5616 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*); 5617 #if defined(PETSC_HAVE_ELEMENTAL) 5618 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*); 5619 #endif 5620 #if defined(PETSC_HAVE_HYPRE) 5621 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 5622 PETSC_INTERN PetscErrorCode MatMatMatMult_Transpose_AIJ_AIJ(Mat,Mat,Mat,MatReuse,PetscReal,Mat*); 5623 #endif 5624 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*); 5625 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*); 5626 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*); 5627 5628 /* 5629 Computes (B'*A')' since computing B*A directly is untenable 5630 5631 n p p 5632 ( ) ( ) ( ) 5633 m ( A ) * n ( B ) = m ( C ) 5634 ( ) ( ) ( ) 5635 5636 */ 5637 PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C) 5638 { 5639 PetscErrorCode ierr; 5640 Mat At,Bt,Ct; 5641 5642 PetscFunctionBegin; 5643 ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 5644 ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr); 5645 ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,1.0,&Ct);CHKERRQ(ierr); 5646 ierr = MatDestroy(&At);CHKERRQ(ierr); 5647 ierr = MatDestroy(&Bt);CHKERRQ(ierr); 5648 ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 5649 ierr = MatDestroy(&Ct);CHKERRQ(ierr); 5650 PetscFunctionReturn(0); 5651 } 5652 5653 PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat *C) 5654 { 5655 PetscErrorCode ierr; 5656 PetscInt m=A->rmap->n,n=B->cmap->n; 5657 Mat Cmat; 5658 5659 PetscFunctionBegin; 5660 if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n); 5661 ierr = MatCreate(PetscObjectComm((PetscObject)A),&Cmat);CHKERRQ(ierr); 5662 ierr = MatSetSizes(Cmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 5663 ierr = MatSetBlockSizesFromMats(Cmat,A,B);CHKERRQ(ierr); 5664 ierr = MatSetType(Cmat,MATMPIDENSE);CHKERRQ(ierr); 5665 ierr = MatMPIDenseSetPreallocation(Cmat,NULL);CHKERRQ(ierr); 5666 ierr = MatAssemblyBegin(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5667 ierr = MatAssemblyEnd(Cmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5668 5669 Cmat->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5670 5671 *C = Cmat; 5672 PetscFunctionReturn(0); 5673 } 5674 5675 /* ----------------------------------------------------------------*/ 5676 PETSC_INTERN PetscErrorCode MatMatMult_MPIDense_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscReal fill,Mat *C) 5677 { 5678 PetscErrorCode ierr; 5679 5680 PetscFunctionBegin; 5681 if (scall == MAT_INITIAL_MATRIX) { 5682 ierr = PetscLogEventBegin(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5683 ierr = MatMatMultSymbolic_MPIDense_MPIAIJ(A,B,fill,C);CHKERRQ(ierr); 5684 ierr = PetscLogEventEnd(MAT_MatMultSymbolic,A,B,0,0);CHKERRQ(ierr); 5685 } 5686 ierr = PetscLogEventBegin(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5687 ierr = MatMatMultNumeric_MPIDense_MPIAIJ(A,B,*C);CHKERRQ(ierr); 5688 ierr = PetscLogEventEnd(MAT_MatMultNumeric,A,B,0,0);CHKERRQ(ierr); 5689 PetscFunctionReturn(0); 5690 } 5691 5692 /*MC 5693 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 5694 5695 Options Database Keys: 5696 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 5697 5698 Level: beginner 5699 5700 .seealso: MatCreateAIJ() 5701 M*/ 5702 5703 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 5704 { 5705 Mat_MPIAIJ *b; 5706 PetscErrorCode ierr; 5707 PetscMPIInt size; 5708 5709 PetscFunctionBegin; 5710 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 5711 5712 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 5713 B->data = (void*)b; 5714 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 5715 B->assembled = PETSC_FALSE; 5716 B->insertmode = NOT_SET_VALUES; 5717 b->size = size; 5718 5719 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr); 5720 5721 /* build cache for off array entries formed */ 5722 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr); 5723 5724 b->donotstash = PETSC_FALSE; 5725 b->colmap = 0; 5726 b->garray = 0; 5727 b->roworiented = PETSC_TRUE; 5728 5729 /* stuff used for matrix vector multiply */ 5730 b->lvec = NULL; 5731 b->Mvctx = NULL; 5732 5733 /* stuff for MatGetRow() */ 5734 b->rowindices = 0; 5735 b->rowvalues = 0; 5736 b->getrowactive = PETSC_FALSE; 5737 5738 /* flexible pointer used in CUSP/CUSPARSE classes */ 5739 b->spptr = NULL; 5740 5741 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr); 5742 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr); 5743 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr); 5744 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr); 5745 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr); 5746 ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr); 5747 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr); 5748 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr); 5749 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr); 5750 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr); 5751 #if defined(PETSC_HAVE_MKL_SPARSE) 5752 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr); 5753 #endif 5754 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr); 5755 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr); 5756 #if defined(PETSC_HAVE_ELEMENTAL) 5757 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr); 5758 #endif 5759 #if defined(PETSC_HAVE_HYPRE) 5760 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 5761 #endif 5762 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr); 5763 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr); 5764 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMult_mpidense_mpiaij_C",MatMatMult_MPIDense_MPIAIJ);CHKERRQ(ierr); 5765 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultSymbolic_mpidense_mpiaij_C",MatMatMultSymbolic_MPIDense_MPIAIJ);CHKERRQ(ierr); 5766 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMultNumeric_mpidense_mpiaij_C",MatMatMultNumeric_MPIDense_MPIAIJ);CHKERRQ(ierr); 5767 #if defined(PETSC_HAVE_HYPRE) 5768 ierr = PetscObjectComposeFunction((PetscObject)B,"MatMatMatMult_transpose_mpiaij_mpiaij_C",MatMatMatMult_Transpose_AIJ_AIJ);CHKERRQ(ierr); 5769 #endif 5770 ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpiaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr); 5771 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr); 5772 PetscFunctionReturn(0); 5773 } 5774 5775 /*@C 5776 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 5777 and "off-diagonal" part of the matrix in CSR format. 5778 5779 Collective 5780 5781 Input Parameters: 5782 + comm - MPI communicator 5783 . m - number of local rows (Cannot be PETSC_DECIDE) 5784 . n - This value should be the same as the local size used in creating the 5785 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 5786 calculated if N is given) For square matrices n is almost always m. 5787 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 5788 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 5789 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 5790 . j - column indices 5791 . a - matrix values 5792 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 5793 . oj - column indices 5794 - oa - matrix values 5795 5796 Output Parameter: 5797 . mat - the matrix 5798 5799 Level: advanced 5800 5801 Notes: 5802 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 5803 must free the arrays once the matrix has been destroyed and not before. 5804 5805 The i and j indices are 0 based 5806 5807 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 5808 5809 This sets local rows and cannot be used to set off-processor values. 5810 5811 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 5812 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 5813 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 5814 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 5815 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 5816 communication if it is known that only local entries will be set. 5817 5818 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(), 5819 MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays() 5820 @*/ 5821 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat) 5822 { 5823 PetscErrorCode ierr; 5824 Mat_MPIAIJ *maij; 5825 5826 PetscFunctionBegin; 5827 if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative"); 5828 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 5829 if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0"); 5830 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 5831 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 5832 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 5833 maij = (Mat_MPIAIJ*) (*mat)->data; 5834 5835 (*mat)->preallocated = PETSC_TRUE; 5836 5837 ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr); 5838 ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr); 5839 5840 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr); 5841 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr); 5842 5843 ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5844 ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5845 ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5846 ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5847 5848 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 5849 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5850 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5851 ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr); 5852 ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 5853 PetscFunctionReturn(0); 5854 } 5855 5856 /* 5857 Special version for direct calls from Fortran 5858 */ 5859 #include <petsc/private/fortranimpl.h> 5860 5861 /* Change these macros so can be used in void function */ 5862 #undef CHKERRQ 5863 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr) 5864 #undef SETERRQ2 5865 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr) 5866 #undef SETERRQ3 5867 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr) 5868 #undef SETERRQ 5869 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr) 5870 5871 #if defined(PETSC_HAVE_FORTRAN_CAPS) 5872 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 5873 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 5874 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 5875 #else 5876 #endif 5877 PETSC_EXTERN void PETSC_STDCALL matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr) 5878 { 5879 Mat mat = *mmat; 5880 PetscInt m = *mm, n = *mn; 5881 InsertMode addv = *maddv; 5882 Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data; 5883 PetscScalar value; 5884 PetscErrorCode ierr; 5885 5886 MatCheckPreallocated(mat,1); 5887 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 5888 5889 #if defined(PETSC_USE_DEBUG) 5890 else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values"); 5891 #endif 5892 { 5893 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend; 5894 PetscInt cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col; 5895 PetscBool roworiented = aij->roworiented; 5896 5897 /* Some Variables required in the macro */ 5898 Mat A = aij->A; 5899 Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data; 5900 PetscInt *aimax = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j; 5901 MatScalar *aa = a->a; 5902 PetscBool ignorezeroentries = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 5903 Mat B = aij->B; 5904 Mat_SeqAIJ *b = (Mat_SeqAIJ*)B->data; 5905 PetscInt *bimax = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n; 5906 MatScalar *ba = b->a; 5907 5908 PetscInt *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2; 5909 PetscInt nonew = a->nonew; 5910 MatScalar *ap1,*ap2; 5911 5912 PetscFunctionBegin; 5913 for (i=0; i<m; i++) { 5914 if (im[i] < 0) continue; 5915 #if defined(PETSC_USE_DEBUG) 5916 if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1); 5917 #endif 5918 if (im[i] >= rstart && im[i] < rend) { 5919 row = im[i] - rstart; 5920 lastcol1 = -1; 5921 rp1 = aj + ai[row]; 5922 ap1 = aa + ai[row]; 5923 rmax1 = aimax[row]; 5924 nrow1 = ailen[row]; 5925 low1 = 0; 5926 high1 = nrow1; 5927 lastcol2 = -1; 5928 rp2 = bj + bi[row]; 5929 ap2 = ba + bi[row]; 5930 rmax2 = bimax[row]; 5931 nrow2 = bilen[row]; 5932 low2 = 0; 5933 high2 = nrow2; 5934 5935 for (j=0; j<n; j++) { 5936 if (roworiented) value = v[i*n+j]; 5937 else value = v[i+j*m]; 5938 if (in[j] >= cstart && in[j] < cend) { 5939 col = in[j] - cstart; 5940 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5941 MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]); 5942 } else if (in[j] < 0) continue; 5943 #if defined(PETSC_USE_DEBUG) 5944 /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */ 5945 else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);} 5946 #endif 5947 else { 5948 if (mat->was_assembled) { 5949 if (!aij->colmap) { 5950 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr); 5951 } 5952 #if defined(PETSC_USE_CTABLE) 5953 ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr); 5954 col--; 5955 #else 5956 col = aij->colmap[in[j]] - 1; 5957 #endif 5958 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && row != col) continue; 5959 if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) { 5960 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr); 5961 col = in[j]; 5962 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 5963 B = aij->B; 5964 b = (Mat_SeqAIJ*)B->data; 5965 bimax = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; 5966 rp2 = bj + bi[row]; 5967 ap2 = ba + bi[row]; 5968 rmax2 = bimax[row]; 5969 nrow2 = bilen[row]; 5970 low2 = 0; 5971 high2 = nrow2; 5972 bm = aij->B->rmap->n; 5973 ba = b->a; 5974 } 5975 } else col = in[j]; 5976 MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]); 5977 } 5978 } 5979 } else if (!aij->donotstash) { 5980 if (roworiented) { 5981 ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5982 } else { 5983 ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr); 5984 } 5985 } 5986 } 5987 } 5988 PetscFunctionReturnVoid(); 5989 } 5990