1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #if defined(PETSC_HAVE_HYPRE) 12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*); 13 #endif 14 15 #undef __FUNCT__ 16 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 17 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 18 { 19 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 20 PetscErrorCode ierr; 21 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 22 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 23 PetscReal shift = 0.0; 24 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 25 26 PetscFunctionBegin; 27 allowzeropivot = PetscNot(A->erroriffailure); 28 29 if (a->idiagvalid) { 30 if (values) *values = a->idiag; 31 PetscFunctionReturn(0); 32 } 33 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 34 diag_offset = a->diag; 35 if (!a->idiag) { 36 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 37 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 38 } 39 diag = a->idiag; 40 mdiag = a->idiag+bs2*mbs; 41 if (values) *values = a->idiag; 42 /* factor and invert each block */ 43 switch (bs) { 44 case 1: 45 for (i=0; i<mbs; i++) { 46 odiag = v + 1*diag_offset[i]; 47 diag[0] = odiag[0]; 48 mdiag[0] = odiag[0]; 49 50 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 51 if (allowzeropivot) { 52 A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 53 A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]); 54 A->factorerror_zeropivot_row = i; 55 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 56 } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON); 57 } 58 59 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 60 diag += 1; 61 mdiag += 1; 62 } 63 break; 64 case 2: 65 for (i=0; i<mbs; i++) { 66 odiag = v + 4*diag_offset[i]; 67 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 68 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 69 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 70 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 71 diag += 4; 72 mdiag += 4; 73 } 74 break; 75 case 3: 76 for (i=0; i<mbs; i++) { 77 odiag = v + 9*diag_offset[i]; 78 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 79 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 80 diag[8] = odiag[8]; 81 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 82 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 83 mdiag[8] = odiag[8]; 84 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 85 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 86 diag += 9; 87 mdiag += 9; 88 } 89 break; 90 case 4: 91 for (i=0; i<mbs; i++) { 92 odiag = v + 16*diag_offset[i]; 93 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 94 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 95 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 96 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 97 diag += 16; 98 mdiag += 16; 99 } 100 break; 101 case 5: 102 for (i=0; i<mbs; i++) { 103 odiag = v + 25*diag_offset[i]; 104 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 105 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 106 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 107 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 108 diag += 25; 109 mdiag += 25; 110 } 111 break; 112 case 6: 113 for (i=0; i<mbs; i++) { 114 odiag = v + 36*diag_offset[i]; 115 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 116 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 117 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 118 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 119 diag += 36; 120 mdiag += 36; 121 } 122 break; 123 case 7: 124 for (i=0; i<mbs; i++) { 125 odiag = v + 49*diag_offset[i]; 126 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 127 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 128 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 129 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 130 diag += 49; 131 mdiag += 49; 132 } 133 break; 134 default: 135 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 136 for (i=0; i<mbs; i++) { 137 odiag = v + bs2*diag_offset[i]; 138 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 139 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 140 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 141 if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 142 diag += bs2; 143 mdiag += bs2; 144 } 145 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 146 } 147 a->idiagvalid = PETSC_TRUE; 148 PetscFunctionReturn(0); 149 } 150 151 #undef __FUNCT__ 152 #define __FUNCT__ "MatSOR_SeqBAIJ" 153 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 154 { 155 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 156 PetscScalar *x,*work,*w,*workt,*t; 157 const MatScalar *v,*aa = a->a, *idiag; 158 const PetscScalar *b,*xb; 159 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 160 PetscErrorCode ierr; 161 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 162 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 163 164 PetscFunctionBegin; 165 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 166 its = its*lits; 167 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 168 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 169 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 170 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 171 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 172 173 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 174 175 if (!m) PetscFunctionReturn(0); 176 diag = a->diag; 177 idiag = a->idiag; 178 k = PetscMax(A->rmap->n,A->cmap->n); 179 if (!a->mult_work) { 180 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 181 } 182 if (!a->sor_workt) { 183 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 184 } 185 if (!a->sor_work) { 186 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 187 } 188 work = a->mult_work; 189 t = a->sor_workt; 190 w = a->sor_work; 191 192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 193 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 194 195 if (flag & SOR_ZERO_INITIAL_GUESS) { 196 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 197 switch (bs) { 198 case 1: 199 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 200 t[0] = b[0]; 201 i2 = 1; 202 idiag += 1; 203 for (i=1; i<m; i++) { 204 v = aa + ai[i]; 205 vi = aj + ai[i]; 206 nz = diag[i] - ai[i]; 207 s[0] = b[i2]; 208 for (j=0; j<nz; j++) { 209 xw[0] = x[vi[j]]; 210 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 211 } 212 t[i2] = s[0]; 213 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 214 x[i2] = xw[0]; 215 idiag += 1; 216 i2 += 1; 217 } 218 break; 219 case 2: 220 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 221 t[0] = b[0]; t[1] = b[1]; 222 i2 = 2; 223 idiag += 4; 224 for (i=1; i<m; i++) { 225 v = aa + 4*ai[i]; 226 vi = aj + ai[i]; 227 nz = diag[i] - ai[i]; 228 s[0] = b[i2]; s[1] = b[i2+1]; 229 for (j=0; j<nz; j++) { 230 idx = 2*vi[j]; 231 it = 4*j; 232 xw[0] = x[idx]; xw[1] = x[1+idx]; 233 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 234 } 235 t[i2] = s[0]; t[i2+1] = s[1]; 236 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 237 x[i2] = xw[0]; x[i2+1] = xw[1]; 238 idiag += 4; 239 i2 += 2; 240 } 241 break; 242 case 3: 243 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 244 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 245 i2 = 3; 246 idiag += 9; 247 for (i=1; i<m; i++) { 248 v = aa + 9*ai[i]; 249 vi = aj + ai[i]; 250 nz = diag[i] - ai[i]; 251 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 252 while (nz--) { 253 idx = 3*(*vi++); 254 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 255 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 256 v += 9; 257 } 258 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 259 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 260 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 261 idiag += 9; 262 i2 += 3; 263 } 264 break; 265 case 4: 266 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 267 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 268 i2 = 4; 269 idiag += 16; 270 for (i=1; i<m; i++) { 271 v = aa + 16*ai[i]; 272 vi = aj + ai[i]; 273 nz = diag[i] - ai[i]; 274 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 275 while (nz--) { 276 idx = 4*(*vi++); 277 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 278 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 279 v += 16; 280 } 281 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 282 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 283 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 284 idiag += 16; 285 i2 += 4; 286 } 287 break; 288 case 5: 289 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 290 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 291 i2 = 5; 292 idiag += 25; 293 for (i=1; i<m; i++) { 294 v = aa + 25*ai[i]; 295 vi = aj + ai[i]; 296 nz = diag[i] - ai[i]; 297 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 298 while (nz--) { 299 idx = 5*(*vi++); 300 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 301 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 302 v += 25; 303 } 304 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 305 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 306 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 307 idiag += 25; 308 i2 += 5; 309 } 310 break; 311 case 6: 312 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 313 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 314 i2 = 6; 315 idiag += 36; 316 for (i=1; i<m; i++) { 317 v = aa + 36*ai[i]; 318 vi = aj + ai[i]; 319 nz = diag[i] - ai[i]; 320 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 321 while (nz--) { 322 idx = 6*(*vi++); 323 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 324 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 325 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 326 v += 36; 327 } 328 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 329 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 330 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 331 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 332 idiag += 36; 333 i2 += 6; 334 } 335 break; 336 case 7: 337 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 338 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 339 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 340 i2 = 7; 341 idiag += 49; 342 for (i=1; i<m; i++) { 343 v = aa + 49*ai[i]; 344 vi = aj + ai[i]; 345 nz = diag[i] - ai[i]; 346 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 347 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 348 while (nz--) { 349 idx = 7*(*vi++); 350 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 351 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 352 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 353 v += 49; 354 } 355 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 356 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 357 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 358 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 359 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 360 idiag += 49; 361 i2 += 7; 362 } 363 break; 364 default: 365 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 366 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 367 i2 = bs; 368 idiag += bs2; 369 for (i=1; i<m; i++) { 370 v = aa + bs2*ai[i]; 371 vi = aj + ai[i]; 372 nz = diag[i] - ai[i]; 373 374 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 375 /* copy all rows of x that are needed into contiguous space */ 376 workt = work; 377 for (j=0; j<nz; j++) { 378 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 379 workt += bs; 380 } 381 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 382 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 383 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 384 385 idiag += bs2; 386 i2 += bs; 387 } 388 break; 389 } 390 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 391 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 392 xb = t; 393 } 394 else xb = b; 395 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 396 idiag = a->idiag+bs2*(a->mbs-1); 397 i2 = bs * (m-1); 398 switch (bs) { 399 case 1: 400 s[0] = xb[i2]; 401 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 402 x[i2] = xw[0]; 403 i2 -= 1; 404 for (i=m-2; i>=0; i--) { 405 v = aa + (diag[i]+1); 406 vi = aj + diag[i] + 1; 407 nz = ai[i+1] - diag[i] - 1; 408 s[0] = xb[i2]; 409 for (j=0; j<nz; j++) { 410 xw[0] = x[vi[j]]; 411 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 412 } 413 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 414 x[i2] = xw[0]; 415 idiag -= 1; 416 i2 -= 1; 417 } 418 break; 419 case 2: 420 s[0] = xb[i2]; s[1] = xb[i2+1]; 421 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 422 x[i2] = xw[0]; x[i2+1] = xw[1]; 423 i2 -= 2; 424 idiag -= 4; 425 for (i=m-2; i>=0; i--) { 426 v = aa + 4*(diag[i] + 1); 427 vi = aj + diag[i] + 1; 428 nz = ai[i+1] - diag[i] - 1; 429 s[0] = xb[i2]; s[1] = xb[i2+1]; 430 for (j=0; j<nz; j++) { 431 idx = 2*vi[j]; 432 it = 4*j; 433 xw[0] = x[idx]; xw[1] = x[1+idx]; 434 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 435 } 436 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 437 x[i2] = xw[0]; x[i2+1] = xw[1]; 438 idiag -= 4; 439 i2 -= 2; 440 } 441 break; 442 case 3: 443 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 444 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 445 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 446 i2 -= 3; 447 idiag -= 9; 448 for (i=m-2; i>=0; i--) { 449 v = aa + 9*(diag[i]+1); 450 vi = aj + diag[i] + 1; 451 nz = ai[i+1] - diag[i] - 1; 452 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 453 while (nz--) { 454 idx = 3*(*vi++); 455 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 456 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 457 v += 9; 458 } 459 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 460 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 461 idiag -= 9; 462 i2 -= 3; 463 } 464 break; 465 case 4: 466 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 467 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 468 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 469 i2 -= 4; 470 idiag -= 16; 471 for (i=m-2; i>=0; i--) { 472 v = aa + 16*(diag[i]+1); 473 vi = aj + diag[i] + 1; 474 nz = ai[i+1] - diag[i] - 1; 475 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 476 while (nz--) { 477 idx = 4*(*vi++); 478 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 479 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 480 v += 16; 481 } 482 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 483 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 484 idiag -= 16; 485 i2 -= 4; 486 } 487 break; 488 case 5: 489 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 490 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 491 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 492 i2 -= 5; 493 idiag -= 25; 494 for (i=m-2; i>=0; i--) { 495 v = aa + 25*(diag[i]+1); 496 vi = aj + diag[i] + 1; 497 nz = ai[i+1] - diag[i] - 1; 498 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 499 while (nz--) { 500 idx = 5*(*vi++); 501 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 502 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 503 v += 25; 504 } 505 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 506 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 507 idiag -= 25; 508 i2 -= 5; 509 } 510 break; 511 case 6: 512 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 513 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 514 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 515 i2 -= 6; 516 idiag -= 36; 517 for (i=m-2; i>=0; i--) { 518 v = aa + 36*(diag[i]+1); 519 vi = aj + diag[i] + 1; 520 nz = ai[i+1] - diag[i] - 1; 521 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 522 while (nz--) { 523 idx = 6*(*vi++); 524 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 525 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 526 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 527 v += 36; 528 } 529 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 530 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 531 idiag -= 36; 532 i2 -= 6; 533 } 534 break; 535 case 7: 536 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 537 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 538 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 539 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 540 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 541 i2 -= 7; 542 idiag -= 49; 543 for (i=m-2; i>=0; i--) { 544 v = aa + 49*(diag[i]+1); 545 vi = aj + diag[i] + 1; 546 nz = ai[i+1] - diag[i] - 1; 547 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 548 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 549 while (nz--) { 550 idx = 7*(*vi++); 551 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 552 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 553 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 554 v += 49; 555 } 556 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 557 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 558 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 559 idiag -= 49; 560 i2 -= 7; 561 } 562 break; 563 default: 564 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 565 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 566 i2 -= bs; 567 idiag -= bs2; 568 for (i=m-2; i>=0; i--) { 569 v = aa + bs2*(diag[i]+1); 570 vi = aj + diag[i] + 1; 571 nz = ai[i+1] - diag[i] - 1; 572 573 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 574 /* copy all rows of x that are needed into contiguous space */ 575 workt = work; 576 for (j=0; j<nz; j++) { 577 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 578 workt += bs; 579 } 580 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 581 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 582 583 idiag -= bs2; 584 i2 -= bs; 585 } 586 break; 587 } 588 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 589 } 590 its--; 591 } 592 while (its--) { 593 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 594 idiag = a->idiag; 595 i2 = 0; 596 switch (bs) { 597 case 1: 598 for (i=0; i<m; i++) { 599 v = aa + ai[i]; 600 vi = aj + ai[i]; 601 nz = ai[i+1] - ai[i]; 602 s[0] = b[i2]; 603 for (j=0; j<nz; j++) { 604 xw[0] = x[vi[j]]; 605 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 606 } 607 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 608 x[i2] += xw[0]; 609 idiag += 1; 610 i2 += 1; 611 } 612 break; 613 case 2: 614 for (i=0; i<m; i++) { 615 v = aa + 4*ai[i]; 616 vi = aj + ai[i]; 617 nz = ai[i+1] - ai[i]; 618 s[0] = b[i2]; s[1] = b[i2+1]; 619 for (j=0; j<nz; j++) { 620 idx = 2*vi[j]; 621 it = 4*j; 622 xw[0] = x[idx]; xw[1] = x[1+idx]; 623 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 624 } 625 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 626 x[i2] += xw[0]; x[i2+1] += xw[1]; 627 idiag += 4; 628 i2 += 2; 629 } 630 break; 631 case 3: 632 for (i=0; i<m; i++) { 633 v = aa + 9*ai[i]; 634 vi = aj + ai[i]; 635 nz = ai[i+1] - ai[i]; 636 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 637 while (nz--) { 638 idx = 3*(*vi++); 639 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 640 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 641 v += 9; 642 } 643 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 644 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 645 idiag += 9; 646 i2 += 3; 647 } 648 break; 649 case 4: 650 for (i=0; i<m; i++) { 651 v = aa + 16*ai[i]; 652 vi = aj + ai[i]; 653 nz = ai[i+1] - ai[i]; 654 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 655 while (nz--) { 656 idx = 4*(*vi++); 657 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 658 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 659 v += 16; 660 } 661 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 662 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 663 idiag += 16; 664 i2 += 4; 665 } 666 break; 667 case 5: 668 for (i=0; i<m; i++) { 669 v = aa + 25*ai[i]; 670 vi = aj + ai[i]; 671 nz = ai[i+1] - ai[i]; 672 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 673 while (nz--) { 674 idx = 5*(*vi++); 675 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 676 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 677 v += 25; 678 } 679 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 680 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 681 idiag += 25; 682 i2 += 5; 683 } 684 break; 685 case 6: 686 for (i=0; i<m; i++) { 687 v = aa + 36*ai[i]; 688 vi = aj + ai[i]; 689 nz = ai[i+1] - ai[i]; 690 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 691 while (nz--) { 692 idx = 6*(*vi++); 693 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 694 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 695 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 696 v += 36; 697 } 698 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 699 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 700 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 701 idiag += 36; 702 i2 += 6; 703 } 704 break; 705 case 7: 706 for (i=0; i<m; i++) { 707 v = aa + 49*ai[i]; 708 vi = aj + ai[i]; 709 nz = ai[i+1] - ai[i]; 710 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 711 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 712 while (nz--) { 713 idx = 7*(*vi++); 714 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 715 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 716 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 717 v += 49; 718 } 719 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 720 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 721 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 722 idiag += 49; 723 i2 += 7; 724 } 725 break; 726 default: 727 for (i=0; i<m; i++) { 728 v = aa + bs2*ai[i]; 729 vi = aj + ai[i]; 730 nz = ai[i+1] - ai[i]; 731 732 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 733 /* copy all rows of x that are needed into contiguous space */ 734 workt = work; 735 for (j=0; j<nz; j++) { 736 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 737 workt += bs; 738 } 739 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 740 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 741 742 idiag += bs2; 743 i2 += bs; 744 } 745 break; 746 } 747 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 748 } 749 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 750 idiag = a->idiag+bs2*(a->mbs-1); 751 i2 = bs * (m-1); 752 switch (bs) { 753 case 1: 754 for (i=m-1; i>=0; i--) { 755 v = aa + ai[i]; 756 vi = aj + ai[i]; 757 nz = ai[i+1] - ai[i]; 758 s[0] = b[i2]; 759 for (j=0; j<nz; j++) { 760 xw[0] = x[vi[j]]; 761 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 762 } 763 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 764 x[i2] += xw[0]; 765 idiag -= 1; 766 i2 -= 1; 767 } 768 break; 769 case 2: 770 for (i=m-1; i>=0; i--) { 771 v = aa + 4*ai[i]; 772 vi = aj + ai[i]; 773 nz = ai[i+1] - ai[i]; 774 s[0] = b[i2]; s[1] = b[i2+1]; 775 for (j=0; j<nz; j++) { 776 idx = 2*vi[j]; 777 it = 4*j; 778 xw[0] = x[idx]; xw[1] = x[1+idx]; 779 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 780 } 781 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 782 x[i2] += xw[0]; x[i2+1] += xw[1]; 783 idiag -= 4; 784 i2 -= 2; 785 } 786 break; 787 case 3: 788 for (i=m-1; i>=0; i--) { 789 v = aa + 9*ai[i]; 790 vi = aj + ai[i]; 791 nz = ai[i+1] - ai[i]; 792 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 793 while (nz--) { 794 idx = 3*(*vi++); 795 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 796 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 797 v += 9; 798 } 799 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 800 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 801 idiag -= 9; 802 i2 -= 3; 803 } 804 break; 805 case 4: 806 for (i=m-1; i>=0; i--) { 807 v = aa + 16*ai[i]; 808 vi = aj + ai[i]; 809 nz = ai[i+1] - ai[i]; 810 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 811 while (nz--) { 812 idx = 4*(*vi++); 813 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 814 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 815 v += 16; 816 } 817 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 818 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 819 idiag -= 16; 820 i2 -= 4; 821 } 822 break; 823 case 5: 824 for (i=m-1; i>=0; i--) { 825 v = aa + 25*ai[i]; 826 vi = aj + ai[i]; 827 nz = ai[i+1] - ai[i]; 828 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 829 while (nz--) { 830 idx = 5*(*vi++); 831 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 832 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 833 v += 25; 834 } 835 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 836 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 837 idiag -= 25; 838 i2 -= 5; 839 } 840 break; 841 case 6: 842 for (i=m-1; i>=0; i--) { 843 v = aa + 36*ai[i]; 844 vi = aj + ai[i]; 845 nz = ai[i+1] - ai[i]; 846 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 847 while (nz--) { 848 idx = 6*(*vi++); 849 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 850 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 851 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 852 v += 36; 853 } 854 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 855 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 856 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 857 idiag -= 36; 858 i2 -= 6; 859 } 860 break; 861 case 7: 862 for (i=m-1; i>=0; i--) { 863 v = aa + 49*ai[i]; 864 vi = aj + ai[i]; 865 nz = ai[i+1] - ai[i]; 866 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 867 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 868 while (nz--) { 869 idx = 7*(*vi++); 870 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 871 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 872 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 873 v += 49; 874 } 875 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 876 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 877 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 878 idiag -= 49; 879 i2 -= 7; 880 } 881 break; 882 default: 883 for (i=m-1; i>=0; i--) { 884 v = aa + bs2*ai[i]; 885 vi = aj + ai[i]; 886 nz = ai[i+1] - ai[i]; 887 888 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 889 /* copy all rows of x that are needed into contiguous space */ 890 workt = work; 891 for (j=0; j<nz; j++) { 892 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 893 workt += bs; 894 } 895 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 896 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 897 898 idiag -= bs2; 899 i2 -= bs; 900 } 901 break; 902 } 903 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 904 } 905 } 906 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 907 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 908 PetscFunctionReturn(0); 909 } 910 911 912 /* 913 Special version for direct calls from Fortran (Used in PETSc-fun3d) 914 */ 915 #if defined(PETSC_HAVE_FORTRAN_CAPS) 916 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 917 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 918 #define matsetvaluesblocked4_ matsetvaluesblocked4 919 #endif 920 921 #undef __FUNCT__ 922 #define __FUNCT__ "matsetvaluesblocked4_" 923 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 924 { 925 Mat A = *AA; 926 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 927 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 928 PetscInt *ai =a->i,*ailen=a->ilen; 929 PetscInt *aj =a->j,stepval,lastcol = -1; 930 const PetscScalar *value = v; 931 MatScalar *ap,*aa = a->a,*bap; 932 933 PetscFunctionBegin; 934 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 935 stepval = (n-1)*4; 936 for (k=0; k<m; k++) { /* loop over added rows */ 937 row = im[k]; 938 rp = aj + ai[row]; 939 ap = aa + 16*ai[row]; 940 nrow = ailen[row]; 941 low = 0; 942 high = nrow; 943 for (l=0; l<n; l++) { /* loop over added columns */ 944 col = in[l]; 945 if (col <= lastcol) low = 0; 946 else high = nrow; 947 lastcol = col; 948 value = v + k*(stepval+4 + l)*4; 949 while (high-low > 7) { 950 t = (low+high)/2; 951 if (rp[t] > col) high = t; 952 else low = t; 953 } 954 for (i=low; i<high; i++) { 955 if (rp[i] > col) break; 956 if (rp[i] == col) { 957 bap = ap + 16*i; 958 for (ii=0; ii<4; ii++,value+=stepval) { 959 for (jj=ii; jj<16; jj+=4) { 960 bap[jj] += *value++; 961 } 962 } 963 goto noinsert2; 964 } 965 } 966 N = nrow++ - 1; 967 high++; /* added new column index thus must search to one higher than before */ 968 /* shift up all the later entries in this row */ 969 for (ii=N; ii>=i; ii--) { 970 rp[ii+1] = rp[ii]; 971 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 972 } 973 if (N >= i) { 974 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 975 } 976 rp[i] = col; 977 bap = ap + 16*i; 978 for (ii=0; ii<4; ii++,value+=stepval) { 979 for (jj=ii; jj<16; jj+=4) { 980 bap[jj] = *value++; 981 } 982 } 983 noinsert2:; 984 low = i; 985 } 986 ailen[row] = nrow; 987 } 988 PetscFunctionReturnVoid(); 989 } 990 991 #if defined(PETSC_HAVE_FORTRAN_CAPS) 992 #define matsetvalues4_ MATSETVALUES4 993 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 994 #define matsetvalues4_ matsetvalues4 995 #endif 996 997 #undef __FUNCT__ 998 #define __FUNCT__ "MatSetValues4_" 999 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 1000 { 1001 Mat A = *AA; 1002 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1003 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 1004 PetscInt *ai=a->i,*ailen=a->ilen; 1005 PetscInt *aj=a->j,brow,bcol; 1006 PetscInt ridx,cidx,lastcol = -1; 1007 MatScalar *ap,value,*aa=a->a,*bap; 1008 1009 PetscFunctionBegin; 1010 for (k=0; k<m; k++) { /* loop over added rows */ 1011 row = im[k]; brow = row/4; 1012 rp = aj + ai[brow]; 1013 ap = aa + 16*ai[brow]; 1014 nrow = ailen[brow]; 1015 low = 0; 1016 high = nrow; 1017 for (l=0; l<n; l++) { /* loop over added columns */ 1018 col = in[l]; bcol = col/4; 1019 ridx = row % 4; cidx = col % 4; 1020 value = v[l + k*n]; 1021 if (col <= lastcol) low = 0; 1022 else high = nrow; 1023 lastcol = col; 1024 while (high-low > 7) { 1025 t = (low+high)/2; 1026 if (rp[t] > bcol) high = t; 1027 else low = t; 1028 } 1029 for (i=low; i<high; i++) { 1030 if (rp[i] > bcol) break; 1031 if (rp[i] == bcol) { 1032 bap = ap + 16*i + 4*cidx + ridx; 1033 *bap += value; 1034 goto noinsert1; 1035 } 1036 } 1037 N = nrow++ - 1; 1038 high++; /* added new column thus must search to one higher than before */ 1039 /* shift up all the later entries in this row */ 1040 for (ii=N; ii>=i; ii--) { 1041 rp[ii+1] = rp[ii]; 1042 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1043 } 1044 if (N>=i) { 1045 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1046 } 1047 rp[i] = bcol; 1048 ap[16*i + 4*cidx + ridx] = value; 1049 noinsert1:; 1050 low = i; 1051 } 1052 ailen[brow] = nrow; 1053 } 1054 PetscFunctionReturnVoid(); 1055 } 1056 1057 /* 1058 Checks for missing diagonals 1059 */ 1060 #undef __FUNCT__ 1061 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1062 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1063 { 1064 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1065 PetscErrorCode ierr; 1066 PetscInt *diag,*ii = a->i,i; 1067 1068 PetscFunctionBegin; 1069 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1070 *missing = PETSC_FALSE; 1071 if (A->rmap->n > 0 && !ii) { 1072 *missing = PETSC_TRUE; 1073 if (d) *d = 0; 1074 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1075 } else { 1076 diag = a->diag; 1077 for (i=0; i<a->mbs; i++) { 1078 if (diag[i] >= ii[i+1]) { 1079 *missing = PETSC_TRUE; 1080 if (d) *d = i; 1081 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1082 break; 1083 } 1084 } 1085 } 1086 PetscFunctionReturn(0); 1087 } 1088 1089 #undef __FUNCT__ 1090 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1091 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1092 { 1093 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1094 PetscErrorCode ierr; 1095 PetscInt i,j,m = a->mbs; 1096 1097 PetscFunctionBegin; 1098 if (!a->diag) { 1099 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1100 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1101 a->free_diag = PETSC_TRUE; 1102 } 1103 for (i=0; i<m; i++) { 1104 a->diag[i] = a->i[i+1]; 1105 for (j=a->i[i]; j<a->i[i+1]; j++) { 1106 if (a->j[j] == i) { 1107 a->diag[i] = j; 1108 break; 1109 } 1110 } 1111 } 1112 PetscFunctionReturn(0); 1113 } 1114 1115 1116 #undef __FUNCT__ 1117 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1118 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1119 { 1120 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1121 PetscErrorCode ierr; 1122 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1123 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1124 1125 PetscFunctionBegin; 1126 *nn = n; 1127 if (!ia) PetscFunctionReturn(0); 1128 if (symmetric) { 1129 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr); 1130 nz = tia[n]; 1131 } else { 1132 tia = a->i; tja = a->j; 1133 } 1134 1135 if (!blockcompressed && bs > 1) { 1136 (*nn) *= bs; 1137 /* malloc & create the natural set of indices */ 1138 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1139 if (n) { 1140 (*ia)[0] = oshift; 1141 for (j=1; j<bs; j++) { 1142 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1143 } 1144 } 1145 1146 for (i=1; i<n; i++) { 1147 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1148 for (j=1; j<bs; j++) { 1149 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1150 } 1151 } 1152 if (n) { 1153 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1154 } 1155 1156 if (inja) { 1157 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1158 cnt = 0; 1159 for (i=0; i<n; i++) { 1160 for (j=0; j<bs; j++) { 1161 for (k=tia[i]; k<tia[i+1]; k++) { 1162 for (l=0; l<bs; l++) { 1163 (*ja)[cnt++] = bs*tja[k] + l; 1164 } 1165 } 1166 } 1167 } 1168 } 1169 1170 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1171 ierr = PetscFree(tia);CHKERRQ(ierr); 1172 ierr = PetscFree(tja);CHKERRQ(ierr); 1173 } 1174 } else if (oshift == 1) { 1175 if (symmetric) { 1176 nz = tia[A->rmap->n/bs]; 1177 /* add 1 to i and j indices */ 1178 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1179 *ia = tia; 1180 if (ja) { 1181 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1182 *ja = tja; 1183 } 1184 } else { 1185 nz = a->i[A->rmap->n/bs]; 1186 /* malloc space and add 1 to i and j indices */ 1187 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1188 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1189 if (ja) { 1190 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1191 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1192 } 1193 } 1194 } else { 1195 *ia = tia; 1196 if (ja) *ja = tja; 1197 } 1198 PetscFunctionReturn(0); 1199 } 1200 1201 #undef __FUNCT__ 1202 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1203 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1204 { 1205 PetscErrorCode ierr; 1206 1207 PetscFunctionBegin; 1208 if (!ia) PetscFunctionReturn(0); 1209 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1210 ierr = PetscFree(*ia);CHKERRQ(ierr); 1211 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1212 } 1213 PetscFunctionReturn(0); 1214 } 1215 1216 #undef __FUNCT__ 1217 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1218 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1219 { 1220 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1221 PetscErrorCode ierr; 1222 1223 PetscFunctionBegin; 1224 #if defined(PETSC_USE_LOG) 1225 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1226 #endif 1227 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1228 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1229 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1230 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1231 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1232 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1233 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1234 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1235 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1236 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1237 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1238 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1239 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1240 1241 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1242 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1243 ierr = PetscFree(A->data);CHKERRQ(ierr); 1244 1245 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1250 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1252 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1253 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1254 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1255 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1256 #if defined(PETSC_HAVE_HYPRE) 1257 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr); 1258 #endif 1259 PetscFunctionReturn(0); 1260 } 1261 1262 #undef __FUNCT__ 1263 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1264 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1265 { 1266 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1267 PetscErrorCode ierr; 1268 1269 PetscFunctionBegin; 1270 switch (op) { 1271 case MAT_ROW_ORIENTED: 1272 a->roworiented = flg; 1273 break; 1274 case MAT_KEEP_NONZERO_PATTERN: 1275 a->keepnonzeropattern = flg; 1276 break; 1277 case MAT_NEW_NONZERO_LOCATIONS: 1278 a->nonew = (flg ? 0 : 1); 1279 break; 1280 case MAT_NEW_NONZERO_LOCATION_ERR: 1281 a->nonew = (flg ? -1 : 0); 1282 break; 1283 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1284 a->nonew = (flg ? -2 : 0); 1285 break; 1286 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1287 a->nounused = (flg ? -1 : 0); 1288 break; 1289 case MAT_NEW_DIAGONALS: 1290 case MAT_IGNORE_OFF_PROC_ENTRIES: 1291 case MAT_USE_HASH_TABLE: 1292 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1293 break; 1294 case MAT_SPD: 1295 case MAT_SYMMETRIC: 1296 case MAT_STRUCTURALLY_SYMMETRIC: 1297 case MAT_HERMITIAN: 1298 case MAT_SYMMETRY_ETERNAL: 1299 case MAT_SUBMAT_SINGLEIS: 1300 /* These options are handled directly by MatSetOption() */ 1301 break; 1302 default: 1303 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1304 } 1305 PetscFunctionReturn(0); 1306 } 1307 1308 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1309 #undef __FUNCT__ 1310 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1311 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1312 { 1313 PetscErrorCode ierr; 1314 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1315 MatScalar *aa_i; 1316 PetscScalar *v_i; 1317 1318 PetscFunctionBegin; 1319 bs = A->rmap->bs; 1320 bs2 = bs*bs; 1321 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1322 1323 bn = row/bs; /* Block number */ 1324 bp = row % bs; /* Block Position */ 1325 M = ai[bn+1] - ai[bn]; 1326 *nz = bs*M; 1327 1328 if (v) { 1329 *v = 0; 1330 if (*nz) { 1331 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1332 for (i=0; i<M; i++) { /* for each block in the block row */ 1333 v_i = *v + i*bs; 1334 aa_i = aa + bs2*(ai[bn] + i); 1335 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1336 } 1337 } 1338 } 1339 1340 if (idx) { 1341 *idx = 0; 1342 if (*nz) { 1343 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1344 for (i=0; i<M; i++) { /* for each block in the block row */ 1345 idx_i = *idx + i*bs; 1346 itmp = bs*aj[ai[bn] + i]; 1347 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1348 } 1349 } 1350 } 1351 PetscFunctionReturn(0); 1352 } 1353 1354 #undef __FUNCT__ 1355 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1356 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1357 { 1358 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1359 PetscErrorCode ierr; 1360 1361 PetscFunctionBegin; 1362 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1363 PetscFunctionReturn(0); 1364 } 1365 1366 #undef __FUNCT__ 1367 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1368 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1369 { 1370 PetscErrorCode ierr; 1371 1372 PetscFunctionBegin; 1373 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1374 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1375 PetscFunctionReturn(0); 1376 } 1377 1378 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1379 1380 #undef __FUNCT__ 1381 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1382 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1383 { 1384 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1385 Mat C; 1386 PetscErrorCode ierr; 1387 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1388 PetscInt *rows,*cols,bs2=a->bs2; 1389 MatScalar *array; 1390 1391 PetscFunctionBegin; 1392 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1393 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1394 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1395 1396 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1397 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1398 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1399 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1400 ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr); 1401 ierr = PetscFree(col);CHKERRQ(ierr); 1402 } else { 1403 C = *B; 1404 } 1405 1406 array = a->a; 1407 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1408 for (i=0; i<mbs; i++) { 1409 cols[0] = i*bs; 1410 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1411 len = ai[i+1] - ai[i]; 1412 for (j=0; j<len; j++) { 1413 rows[0] = (*aj++)*bs; 1414 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1415 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1416 array += bs2; 1417 } 1418 } 1419 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1420 1421 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1422 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1423 1424 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1425 *B = C; 1426 } else { 1427 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1428 } 1429 PetscFunctionReturn(0); 1430 } 1431 1432 #undef __FUNCT__ 1433 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1434 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1435 { 1436 PetscErrorCode ierr; 1437 Mat Btrans; 1438 1439 PetscFunctionBegin; 1440 *f = PETSC_FALSE; 1441 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1442 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1443 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1444 PetscFunctionReturn(0); 1445 } 1446 1447 #undef __FUNCT__ 1448 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1449 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1450 { 1451 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1452 PetscErrorCode ierr; 1453 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1454 int fd; 1455 PetscScalar *aa; 1456 FILE *file; 1457 1458 PetscFunctionBegin; 1459 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1460 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1461 col_lens[0] = MAT_FILE_CLASSID; 1462 1463 col_lens[1] = A->rmap->N; 1464 col_lens[2] = A->cmap->n; 1465 col_lens[3] = a->nz*bs2; 1466 1467 /* store lengths of each row and write (including header) to file */ 1468 count = 0; 1469 for (i=0; i<a->mbs; i++) { 1470 for (j=0; j<bs; j++) { 1471 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1472 } 1473 } 1474 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1475 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1476 1477 /* store column indices (zero start index) */ 1478 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1479 count = 0; 1480 for (i=0; i<a->mbs; i++) { 1481 for (j=0; j<bs; j++) { 1482 for (k=a->i[i]; k<a->i[i+1]; k++) { 1483 for (l=0; l<bs; l++) { 1484 jj[count++] = bs*a->j[k] + l; 1485 } 1486 } 1487 } 1488 } 1489 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1490 ierr = PetscFree(jj);CHKERRQ(ierr); 1491 1492 /* store nonzero values */ 1493 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1494 count = 0; 1495 for (i=0; i<a->mbs; i++) { 1496 for (j=0; j<bs; j++) { 1497 for (k=a->i[i]; k<a->i[i+1]; k++) { 1498 for (l=0; l<bs; l++) { 1499 aa[count++] = a->a[bs2*k + l*bs + j]; 1500 } 1501 } 1502 } 1503 } 1504 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1505 ierr = PetscFree(aa);CHKERRQ(ierr); 1506 1507 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1508 if (file) { 1509 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1510 } 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #undef __FUNCT__ 1515 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1516 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1517 { 1518 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1519 PetscErrorCode ierr; 1520 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1521 PetscViewerFormat format; 1522 1523 PetscFunctionBegin; 1524 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1525 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1526 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1527 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1528 const char *matname; 1529 Mat aij; 1530 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1531 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1532 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1533 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1534 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1535 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1536 PetscFunctionReturn(0); 1537 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1538 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1539 for (i=0; i<a->mbs; i++) { 1540 for (j=0; j<bs; j++) { 1541 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1542 for (k=a->i[i]; k<a->i[i+1]; k++) { 1543 for (l=0; l<bs; l++) { 1544 #if defined(PETSC_USE_COMPLEX) 1545 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1546 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1547 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1548 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1549 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1550 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1551 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1552 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1553 } 1554 #else 1555 if (a->a[bs2*k + l*bs + j] != 0.0) { 1556 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1557 } 1558 #endif 1559 } 1560 } 1561 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1562 } 1563 } 1564 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1565 } else { 1566 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1567 for (i=0; i<a->mbs; i++) { 1568 for (j=0; j<bs; j++) { 1569 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1570 for (k=a->i[i]; k<a->i[i+1]; k++) { 1571 for (l=0; l<bs; l++) { 1572 #if defined(PETSC_USE_COMPLEX) 1573 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1574 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1575 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1576 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1577 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1578 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1579 } else { 1580 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1581 } 1582 #else 1583 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1584 #endif 1585 } 1586 } 1587 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1588 } 1589 } 1590 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1591 } 1592 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1593 PetscFunctionReturn(0); 1594 } 1595 1596 #include <petscdraw.h> 1597 #undef __FUNCT__ 1598 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1599 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1600 { 1601 Mat A = (Mat) Aa; 1602 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1603 PetscErrorCode ierr; 1604 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1605 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1606 MatScalar *aa; 1607 PetscViewer viewer; 1608 PetscViewerFormat format; 1609 1610 PetscFunctionBegin; 1611 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1612 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1613 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1614 1615 /* loop over matrix elements drawing boxes */ 1616 1617 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1618 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1619 /* Blue for negative, Cyan for zero and Red for positive */ 1620 color = PETSC_DRAW_BLUE; 1621 for (i=0,row=0; i<mbs; i++,row+=bs) { 1622 for (j=a->i[i]; j<a->i[i+1]; j++) { 1623 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1624 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1625 aa = a->a + j*bs2; 1626 for (k=0; k<bs; k++) { 1627 for (l=0; l<bs; l++) { 1628 if (PetscRealPart(*aa++) >= 0.) continue; 1629 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1630 } 1631 } 1632 } 1633 } 1634 color = PETSC_DRAW_CYAN; 1635 for (i=0,row=0; i<mbs; i++,row+=bs) { 1636 for (j=a->i[i]; j<a->i[i+1]; j++) { 1637 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1638 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1639 aa = a->a + j*bs2; 1640 for (k=0; k<bs; k++) { 1641 for (l=0; l<bs; l++) { 1642 if (PetscRealPart(*aa++) != 0.) continue; 1643 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1644 } 1645 } 1646 } 1647 } 1648 color = PETSC_DRAW_RED; 1649 for (i=0,row=0; i<mbs; i++,row+=bs) { 1650 for (j=a->i[i]; j<a->i[i+1]; j++) { 1651 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1652 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1653 aa = a->a + j*bs2; 1654 for (k=0; k<bs; k++) { 1655 for (l=0; l<bs; l++) { 1656 if (PetscRealPart(*aa++) <= 0.) continue; 1657 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1658 } 1659 } 1660 } 1661 } 1662 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1663 } else { 1664 /* use contour shading to indicate magnitude of values */ 1665 /* first determine max of all nonzero values */ 1666 PetscReal minv = 0.0, maxv = 0.0; 1667 PetscDraw popup; 1668 1669 for (i=0; i<a->nz*a->bs2; i++) { 1670 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1671 } 1672 if (minv >= maxv) maxv = minv + PETSC_SMALL; 1673 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1674 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1675 1676 ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr); 1677 for (i=0,row=0; i<mbs; i++,row+=bs) { 1678 for (j=a->i[i]; j<a->i[i+1]; j++) { 1679 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1680 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1681 aa = a->a + j*bs2; 1682 for (k=0; k<bs; k++) { 1683 for (l=0; l<bs; l++) { 1684 MatScalar v = *aa++; 1685 color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv); 1686 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1687 } 1688 } 1689 } 1690 } 1691 ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr); 1692 } 1693 PetscFunctionReturn(0); 1694 } 1695 1696 #undef __FUNCT__ 1697 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1698 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1699 { 1700 PetscErrorCode ierr; 1701 PetscReal xl,yl,xr,yr,w,h; 1702 PetscDraw draw; 1703 PetscBool isnull; 1704 1705 PetscFunctionBegin; 1706 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1707 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 1708 if (isnull) PetscFunctionReturn(0); 1709 1710 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1711 xr += w; yr += h; xl = -w; yl = -h; 1712 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1713 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1714 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1715 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1716 ierr = PetscDrawSave(draw);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatView_SeqBAIJ" 1722 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1723 { 1724 PetscErrorCode ierr; 1725 PetscBool iascii,isbinary,isdraw; 1726 1727 PetscFunctionBegin; 1728 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1729 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1730 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1731 if (iascii) { 1732 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1733 } else if (isbinary) { 1734 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1735 } else if (isdraw) { 1736 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1737 } else { 1738 Mat B; 1739 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1740 ierr = MatView(B,viewer);CHKERRQ(ierr); 1741 ierr = MatDestroy(&B);CHKERRQ(ierr); 1742 } 1743 PetscFunctionReturn(0); 1744 } 1745 1746 1747 #undef __FUNCT__ 1748 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1749 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1750 { 1751 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1752 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1753 PetscInt *ai = a->i,*ailen = a->ilen; 1754 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1755 MatScalar *ap,*aa = a->a; 1756 1757 PetscFunctionBegin; 1758 for (k=0; k<m; k++) { /* loop over rows */ 1759 row = im[k]; brow = row/bs; 1760 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1761 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1762 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1763 nrow = ailen[brow]; 1764 for (l=0; l<n; l++) { /* loop over columns */ 1765 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1766 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1767 col = in[l]; 1768 bcol = col/bs; 1769 cidx = col%bs; 1770 ridx = row%bs; 1771 high = nrow; 1772 low = 0; /* assume unsorted */ 1773 while (high-low > 5) { 1774 t = (low+high)/2; 1775 if (rp[t] > bcol) high = t; 1776 else low = t; 1777 } 1778 for (i=low; i<high; i++) { 1779 if (rp[i] > bcol) break; 1780 if (rp[i] == bcol) { 1781 *v++ = ap[bs2*i+bs*cidx+ridx]; 1782 goto finished; 1783 } 1784 } 1785 *v++ = 0.0; 1786 finished:; 1787 } 1788 } 1789 PetscFunctionReturn(0); 1790 } 1791 1792 #undef __FUNCT__ 1793 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1794 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1795 { 1796 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1797 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1798 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1799 PetscErrorCode ierr; 1800 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1801 PetscBool roworiented=a->roworiented; 1802 const PetscScalar *value = v; 1803 MatScalar *ap,*aa = a->a,*bap; 1804 1805 PetscFunctionBegin; 1806 if (roworiented) { 1807 stepval = (n-1)*bs; 1808 } else { 1809 stepval = (m-1)*bs; 1810 } 1811 for (k=0; k<m; k++) { /* loop over added rows */ 1812 row = im[k]; 1813 if (row < 0) continue; 1814 #if defined(PETSC_USE_DEBUG) 1815 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1816 #endif 1817 rp = aj + ai[row]; 1818 ap = aa + bs2*ai[row]; 1819 rmax = imax[row]; 1820 nrow = ailen[row]; 1821 low = 0; 1822 high = nrow; 1823 for (l=0; l<n; l++) { /* loop over added columns */ 1824 if (in[l] < 0) continue; 1825 #if defined(PETSC_USE_DEBUG) 1826 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1827 #endif 1828 col = in[l]; 1829 if (roworiented) { 1830 value = v + (k*(stepval+bs) + l)*bs; 1831 } else { 1832 value = v + (l*(stepval+bs) + k)*bs; 1833 } 1834 if (col <= lastcol) low = 0; 1835 else high = nrow; 1836 lastcol = col; 1837 while (high-low > 7) { 1838 t = (low+high)/2; 1839 if (rp[t] > col) high = t; 1840 else low = t; 1841 } 1842 for (i=low; i<high; i++) { 1843 if (rp[i] > col) break; 1844 if (rp[i] == col) { 1845 bap = ap + bs2*i; 1846 if (roworiented) { 1847 if (is == ADD_VALUES) { 1848 for (ii=0; ii<bs; ii++,value+=stepval) { 1849 for (jj=ii; jj<bs2; jj+=bs) { 1850 bap[jj] += *value++; 1851 } 1852 } 1853 } else { 1854 for (ii=0; ii<bs; ii++,value+=stepval) { 1855 for (jj=ii; jj<bs2; jj+=bs) { 1856 bap[jj] = *value++; 1857 } 1858 } 1859 } 1860 } else { 1861 if (is == ADD_VALUES) { 1862 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1863 for (jj=0; jj<bs; jj++) { 1864 bap[jj] += value[jj]; 1865 } 1866 bap += bs; 1867 } 1868 } else { 1869 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1870 for (jj=0; jj<bs; jj++) { 1871 bap[jj] = value[jj]; 1872 } 1873 bap += bs; 1874 } 1875 } 1876 } 1877 goto noinsert2; 1878 } 1879 } 1880 if (nonew == 1) goto noinsert2; 1881 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1882 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1883 N = nrow++ - 1; high++; 1884 /* shift up all the later entries in this row */ 1885 for (ii=N; ii>=i; ii--) { 1886 rp[ii+1] = rp[ii]; 1887 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1888 } 1889 if (N >= i) { 1890 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1891 } 1892 rp[i] = col; 1893 bap = ap + bs2*i; 1894 if (roworiented) { 1895 for (ii=0; ii<bs; ii++,value+=stepval) { 1896 for (jj=ii; jj<bs2; jj+=bs) { 1897 bap[jj] = *value++; 1898 } 1899 } 1900 } else { 1901 for (ii=0; ii<bs; ii++,value+=stepval) { 1902 for (jj=0; jj<bs; jj++) { 1903 *bap++ = *value++; 1904 } 1905 } 1906 } 1907 noinsert2:; 1908 low = i; 1909 } 1910 ailen[row] = nrow; 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 #undef __FUNCT__ 1916 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1917 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1918 { 1919 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1920 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1921 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1922 PetscErrorCode ierr; 1923 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1924 MatScalar *aa = a->a,*ap; 1925 PetscReal ratio=0.6; 1926 1927 PetscFunctionBegin; 1928 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1929 1930 if (m) rmax = ailen[0]; 1931 for (i=1; i<mbs; i++) { 1932 /* move each row back by the amount of empty slots (fshift) before it*/ 1933 fshift += imax[i-1] - ailen[i-1]; 1934 rmax = PetscMax(rmax,ailen[i]); 1935 if (fshift) { 1936 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1937 N = ailen[i]; 1938 for (j=0; j<N; j++) { 1939 ip[j-fshift] = ip[j]; 1940 1941 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1942 } 1943 } 1944 ai[i] = ai[i-1] + ailen[i-1]; 1945 } 1946 if (mbs) { 1947 fshift += imax[mbs-1] - ailen[mbs-1]; 1948 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1949 } 1950 1951 /* reset ilen and imax for each row */ 1952 a->nonzerorowcnt = 0; 1953 for (i=0; i<mbs; i++) { 1954 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1955 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1956 } 1957 a->nz = ai[mbs]; 1958 1959 /* diagonals may have moved, so kill the diagonal pointers */ 1960 a->idiagvalid = PETSC_FALSE; 1961 if (fshift && a->diag) { 1962 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1963 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1964 a->diag = 0; 1965 } 1966 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1967 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1968 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1969 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1970 1971 A->info.mallocs += a->reallocs; 1972 a->reallocs = 0; 1973 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1974 a->rmax = rmax; 1975 1976 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1977 PetscFunctionReturn(0); 1978 } 1979 1980 /* 1981 This function returns an array of flags which indicate the locations of contiguous 1982 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1983 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1984 Assume: sizes should be long enough to hold all the values. 1985 */ 1986 #undef __FUNCT__ 1987 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1988 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1989 { 1990 PetscInt i,j,k,row; 1991 PetscBool flg; 1992 1993 PetscFunctionBegin; 1994 for (i=0,j=0; i<n; j++) { 1995 row = idx[i]; 1996 if (row%bs!=0) { /* Not the begining of a block */ 1997 sizes[j] = 1; 1998 i++; 1999 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 2000 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 2001 i++; 2002 } else { /* Begining of the block, so check if the complete block exists */ 2003 flg = PETSC_TRUE; 2004 for (k=1; k<bs; k++) { 2005 if (row+k != idx[i+k]) { /* break in the block */ 2006 flg = PETSC_FALSE; 2007 break; 2008 } 2009 } 2010 if (flg) { /* No break in the bs */ 2011 sizes[j] = bs; 2012 i += bs; 2013 } else { 2014 sizes[j] = 1; 2015 i++; 2016 } 2017 } 2018 } 2019 *bs_max = j; 2020 PetscFunctionReturn(0); 2021 } 2022 2023 #undef __FUNCT__ 2024 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2025 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2026 { 2027 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2028 PetscErrorCode ierr; 2029 PetscInt i,j,k,count,*rows; 2030 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2031 PetscScalar zero = 0.0; 2032 MatScalar *aa; 2033 const PetscScalar *xx; 2034 PetscScalar *bb; 2035 2036 PetscFunctionBegin; 2037 /* fix right hand side if needed */ 2038 if (x && b) { 2039 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2040 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2041 for (i=0; i<is_n; i++) { 2042 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2043 } 2044 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2045 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2046 } 2047 2048 /* Make a copy of the IS and sort it */ 2049 /* allocate memory for rows,sizes */ 2050 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2051 2052 /* copy IS values to rows, and sort them */ 2053 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2054 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2055 2056 if (baij->keepnonzeropattern) { 2057 for (i=0; i<is_n; i++) sizes[i] = 1; 2058 bs_max = is_n; 2059 } else { 2060 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2061 A->nonzerostate++; 2062 } 2063 2064 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2065 row = rows[j]; 2066 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2067 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2068 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2069 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2070 if (diag != (PetscScalar)0.0) { 2071 if (baij->ilen[row/bs] > 0) { 2072 baij->ilen[row/bs] = 1; 2073 baij->j[baij->i[row/bs]] = row/bs; 2074 2075 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2076 } 2077 /* Now insert all the diagonal values for this bs */ 2078 for (k=0; k<bs; k++) { 2079 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2080 } 2081 } else { /* (diag == 0.0) */ 2082 baij->ilen[row/bs] = 0; 2083 } /* end (diag == 0.0) */ 2084 } else { /* (sizes[i] != bs) */ 2085 #if defined(PETSC_USE_DEBUG) 2086 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2087 #endif 2088 for (k=0; k<count; k++) { 2089 aa[0] = zero; 2090 aa += bs; 2091 } 2092 if (diag != (PetscScalar)0.0) { 2093 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2094 } 2095 } 2096 } 2097 2098 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2099 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2100 PetscFunctionReturn(0); 2101 } 2102 2103 #undef __FUNCT__ 2104 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2105 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2106 { 2107 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2108 PetscErrorCode ierr; 2109 PetscInt i,j,k,count; 2110 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2111 PetscScalar zero = 0.0; 2112 MatScalar *aa; 2113 const PetscScalar *xx; 2114 PetscScalar *bb; 2115 PetscBool *zeroed,vecs = PETSC_FALSE; 2116 2117 PetscFunctionBegin; 2118 /* fix right hand side if needed */ 2119 if (x && b) { 2120 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2121 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2122 vecs = PETSC_TRUE; 2123 } 2124 2125 /* zero the columns */ 2126 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2127 for (i=0; i<is_n; i++) { 2128 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2129 zeroed[is_idx[i]] = PETSC_TRUE; 2130 } 2131 for (i=0; i<A->rmap->N; i++) { 2132 if (!zeroed[i]) { 2133 row = i/bs; 2134 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2135 for (k=0; k<bs; k++) { 2136 col = bs*baij->j[j] + k; 2137 if (zeroed[col]) { 2138 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2139 if (vecs) bb[i] -= aa[0]*xx[col]; 2140 aa[0] = 0.0; 2141 } 2142 } 2143 } 2144 } else if (vecs) bb[i] = diag*xx[i]; 2145 } 2146 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2147 if (vecs) { 2148 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2149 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2150 } 2151 2152 /* zero the rows */ 2153 for (i=0; i<is_n; i++) { 2154 row = is_idx[i]; 2155 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2156 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2157 for (k=0; k<count; k++) { 2158 aa[0] = zero; 2159 aa += bs; 2160 } 2161 if (diag != (PetscScalar)0.0) { 2162 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2163 } 2164 } 2165 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2166 PetscFunctionReturn(0); 2167 } 2168 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2171 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2172 { 2173 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2174 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2175 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2176 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2177 PetscErrorCode ierr; 2178 PetscInt ridx,cidx,bs2=a->bs2; 2179 PetscBool roworiented=a->roworiented; 2180 MatScalar *ap,value,*aa=a->a,*bap; 2181 2182 PetscFunctionBegin; 2183 for (k=0; k<m; k++) { /* loop over added rows */ 2184 row = im[k]; 2185 brow = row/bs; 2186 if (row < 0) continue; 2187 #if defined(PETSC_USE_DEBUG) 2188 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2189 #endif 2190 rp = aj + ai[brow]; 2191 ap = aa + bs2*ai[brow]; 2192 rmax = imax[brow]; 2193 nrow = ailen[brow]; 2194 low = 0; 2195 high = nrow; 2196 for (l=0; l<n; l++) { /* loop over added columns */ 2197 if (in[l] < 0) continue; 2198 #if defined(PETSC_USE_DEBUG) 2199 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2200 #endif 2201 col = in[l]; bcol = col/bs; 2202 ridx = row % bs; cidx = col % bs; 2203 if (roworiented) { 2204 value = v[l + k*n]; 2205 } else { 2206 value = v[k + l*m]; 2207 } 2208 if (col <= lastcol) low = 0; else high = nrow; 2209 lastcol = col; 2210 while (high-low > 7) { 2211 t = (low+high)/2; 2212 if (rp[t] > bcol) high = t; 2213 else low = t; 2214 } 2215 for (i=low; i<high; i++) { 2216 if (rp[i] > bcol) break; 2217 if (rp[i] == bcol) { 2218 bap = ap + bs2*i + bs*cidx + ridx; 2219 if (is == ADD_VALUES) *bap += value; 2220 else *bap = value; 2221 goto noinsert1; 2222 } 2223 } 2224 if (nonew == 1) goto noinsert1; 2225 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2226 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2227 N = nrow++ - 1; high++; 2228 /* shift up all the later entries in this row */ 2229 for (ii=N; ii>=i; ii--) { 2230 rp[ii+1] = rp[ii]; 2231 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2232 } 2233 if (N>=i) { 2234 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2235 } 2236 rp[i] = bcol; 2237 ap[bs2*i + bs*cidx + ridx] = value; 2238 a->nz++; 2239 A->nonzerostate++; 2240 noinsert1:; 2241 low = i; 2242 } 2243 ailen[brow] = nrow; 2244 } 2245 PetscFunctionReturn(0); 2246 } 2247 2248 #undef __FUNCT__ 2249 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2250 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2251 { 2252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2253 Mat outA; 2254 PetscErrorCode ierr; 2255 PetscBool row_identity,col_identity; 2256 2257 PetscFunctionBegin; 2258 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2259 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2260 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2261 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2262 2263 outA = inA; 2264 inA->factortype = MAT_FACTOR_LU; 2265 ierr = PetscFree(inA->solvertype);CHKERRQ(ierr); 2266 ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr); 2267 2268 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2269 2270 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2271 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2272 a->row = row; 2273 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2274 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2275 a->col = col; 2276 2277 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2278 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2279 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2280 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2281 2282 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2283 if (!a->solve_work) { 2284 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2285 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2286 } 2287 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2288 PetscFunctionReturn(0); 2289 } 2290 2291 #undef __FUNCT__ 2292 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2293 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2294 { 2295 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2296 PetscInt i,nz,mbs; 2297 2298 PetscFunctionBegin; 2299 nz = baij->maxnz; 2300 mbs = baij->mbs; 2301 for (i=0; i<nz; i++) { 2302 baij->j[i] = indices[i]; 2303 } 2304 baij->nz = nz; 2305 for (i=0; i<mbs; i++) { 2306 baij->ilen[i] = baij->imax[i]; 2307 } 2308 PetscFunctionReturn(0); 2309 } 2310 2311 #undef __FUNCT__ 2312 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2313 /*@ 2314 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2315 in the matrix. 2316 2317 Input Parameters: 2318 + mat - the SeqBAIJ matrix 2319 - indices - the column indices 2320 2321 Level: advanced 2322 2323 Notes: 2324 This can be called if you have precomputed the nonzero structure of the 2325 matrix and want to provide it to the matrix object to improve the performance 2326 of the MatSetValues() operation. 2327 2328 You MUST have set the correct numbers of nonzeros per row in the call to 2329 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2330 2331 MUST be called before any calls to MatSetValues(); 2332 2333 @*/ 2334 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2335 { 2336 PetscErrorCode ierr; 2337 2338 PetscFunctionBegin; 2339 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2340 PetscValidPointer(indices,2); 2341 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2342 PetscFunctionReturn(0); 2343 } 2344 2345 #undef __FUNCT__ 2346 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2347 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2348 { 2349 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2350 PetscErrorCode ierr; 2351 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2352 PetscReal atmp; 2353 PetscScalar *x,zero = 0.0; 2354 MatScalar *aa; 2355 PetscInt ncols,brow,krow,kcol; 2356 2357 PetscFunctionBegin; 2358 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2359 bs = A->rmap->bs; 2360 aa = a->a; 2361 ai = a->i; 2362 aj = a->j; 2363 mbs = a->mbs; 2364 2365 ierr = VecSet(v,zero);CHKERRQ(ierr); 2366 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2367 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2368 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2369 for (i=0; i<mbs; i++) { 2370 ncols = ai[1] - ai[0]; ai++; 2371 brow = bs*i; 2372 for (j=0; j<ncols; j++) { 2373 for (kcol=0; kcol<bs; kcol++) { 2374 for (krow=0; krow<bs; krow++) { 2375 atmp = PetscAbsScalar(*aa);aa++; 2376 row = brow + krow; /* row index */ 2377 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2378 } 2379 } 2380 aj++; 2381 } 2382 } 2383 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2384 PetscFunctionReturn(0); 2385 } 2386 2387 #undef __FUNCT__ 2388 #define __FUNCT__ "MatCopy_SeqBAIJ" 2389 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2390 { 2391 PetscErrorCode ierr; 2392 2393 PetscFunctionBegin; 2394 /* If the two matrices have the same copy implementation, use fast copy. */ 2395 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2396 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2397 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2398 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2399 2400 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2401 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2402 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2403 } else { 2404 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2405 } 2406 PetscFunctionReturn(0); 2407 } 2408 2409 #undef __FUNCT__ 2410 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2411 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2412 { 2413 PetscErrorCode ierr; 2414 2415 PetscFunctionBegin; 2416 ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2417 PetscFunctionReturn(0); 2418 } 2419 2420 #undef __FUNCT__ 2421 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2422 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2423 { 2424 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2425 2426 PetscFunctionBegin; 2427 *array = a->a; 2428 PetscFunctionReturn(0); 2429 } 2430 2431 #undef __FUNCT__ 2432 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2433 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2434 { 2435 PetscFunctionBegin; 2436 PetscFunctionReturn(0); 2437 } 2438 2439 #undef __FUNCT__ 2440 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2441 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2442 { 2443 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2444 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2445 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2446 PetscErrorCode ierr; 2447 2448 PetscFunctionBegin; 2449 /* Set the number of nonzeros in the new matrix */ 2450 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2451 PetscFunctionReturn(0); 2452 } 2453 2454 #undef __FUNCT__ 2455 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2456 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2457 { 2458 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2459 PetscErrorCode ierr; 2460 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2461 PetscBLASInt one=1; 2462 2463 PetscFunctionBegin; 2464 if (str == SAME_NONZERO_PATTERN) { 2465 PetscScalar alpha = a; 2466 PetscBLASInt bnz; 2467 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2468 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2469 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2470 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2471 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2472 } else { 2473 Mat B; 2474 PetscInt *nnz; 2475 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2476 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2477 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2478 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2479 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2480 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2481 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2482 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2483 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2484 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2485 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2486 ierr = PetscFree(nnz);CHKERRQ(ierr); 2487 } 2488 PetscFunctionReturn(0); 2489 } 2490 2491 #undef __FUNCT__ 2492 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2493 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2494 { 2495 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2496 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2497 MatScalar *aa = a->a; 2498 2499 PetscFunctionBegin; 2500 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 #undef __FUNCT__ 2505 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2506 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2507 { 2508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2509 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2510 MatScalar *aa = a->a; 2511 2512 PetscFunctionBegin; 2513 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 #undef __FUNCT__ 2518 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2519 /* 2520 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2521 */ 2522 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2523 { 2524 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2525 PetscErrorCode ierr; 2526 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2527 PetscInt nz = a->i[m],row,*jj,mr,col; 2528 2529 PetscFunctionBegin; 2530 *nn = n; 2531 if (!ia) PetscFunctionReturn(0); 2532 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2533 else { 2534 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2535 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2536 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2537 jj = a->j; 2538 for (i=0; i<nz; i++) { 2539 collengths[jj[i]]++; 2540 } 2541 cia[0] = oshift; 2542 for (i=0; i<n; i++) { 2543 cia[i+1] = cia[i] + collengths[i]; 2544 } 2545 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2546 jj = a->j; 2547 for (row=0; row<m; row++) { 2548 mr = a->i[row+1] - a->i[row]; 2549 for (i=0; i<mr; i++) { 2550 col = *jj++; 2551 2552 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2553 } 2554 } 2555 ierr = PetscFree(collengths);CHKERRQ(ierr); 2556 *ia = cia; *ja = cja; 2557 } 2558 PetscFunctionReturn(0); 2559 } 2560 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2563 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2564 { 2565 PetscErrorCode ierr; 2566 2567 PetscFunctionBegin; 2568 if (!ia) PetscFunctionReturn(0); 2569 ierr = PetscFree(*ia);CHKERRQ(ierr); 2570 ierr = PetscFree(*ja);CHKERRQ(ierr); 2571 PetscFunctionReturn(0); 2572 } 2573 2574 /* 2575 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2576 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2577 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2578 */ 2579 #undef __FUNCT__ 2580 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2581 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2582 { 2583 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2584 PetscErrorCode ierr; 2585 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2586 PetscInt nz = a->i[m],row,*jj,mr,col; 2587 PetscInt *cspidx; 2588 2589 PetscFunctionBegin; 2590 *nn = n; 2591 if (!ia) PetscFunctionReturn(0); 2592 2593 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2594 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2595 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2596 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2597 jj = a->j; 2598 for (i=0; i<nz; i++) { 2599 collengths[jj[i]]++; 2600 } 2601 cia[0] = oshift; 2602 for (i=0; i<n; i++) { 2603 cia[i+1] = cia[i] + collengths[i]; 2604 } 2605 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2606 jj = a->j; 2607 for (row=0; row<m; row++) { 2608 mr = a->i[row+1] - a->i[row]; 2609 for (i=0; i<mr; i++) { 2610 col = *jj++; 2611 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2612 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2613 } 2614 } 2615 ierr = PetscFree(collengths);CHKERRQ(ierr); 2616 *ia = cia; *ja = cja; 2617 *spidx = cspidx; 2618 PetscFunctionReturn(0); 2619 } 2620 2621 #undef __FUNCT__ 2622 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2623 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2624 { 2625 PetscErrorCode ierr; 2626 2627 PetscFunctionBegin; 2628 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2629 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2630 PetscFunctionReturn(0); 2631 } 2632 2633 #undef __FUNCT__ 2634 #define __FUNCT__ "MatShift_SeqBAIJ" 2635 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2636 { 2637 PetscErrorCode ierr; 2638 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2639 2640 PetscFunctionBegin; 2641 if (!Y->preallocated || !aij->nz) { 2642 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2643 } 2644 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2645 PetscFunctionReturn(0); 2646 } 2647 2648 /* -------------------------------------------------------------------*/ 2649 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2650 MatGetRow_SeqBAIJ, 2651 MatRestoreRow_SeqBAIJ, 2652 MatMult_SeqBAIJ_N, 2653 /* 4*/ MatMultAdd_SeqBAIJ_N, 2654 MatMultTranspose_SeqBAIJ, 2655 MatMultTransposeAdd_SeqBAIJ, 2656 0, 2657 0, 2658 0, 2659 /* 10*/ 0, 2660 MatLUFactor_SeqBAIJ, 2661 0, 2662 0, 2663 MatTranspose_SeqBAIJ, 2664 /* 15*/ MatGetInfo_SeqBAIJ, 2665 MatEqual_SeqBAIJ, 2666 MatGetDiagonal_SeqBAIJ, 2667 MatDiagonalScale_SeqBAIJ, 2668 MatNorm_SeqBAIJ, 2669 /* 20*/ 0, 2670 MatAssemblyEnd_SeqBAIJ, 2671 MatSetOption_SeqBAIJ, 2672 MatZeroEntries_SeqBAIJ, 2673 /* 24*/ MatZeroRows_SeqBAIJ, 2674 0, 2675 0, 2676 0, 2677 0, 2678 /* 29*/ MatSetUp_SeqBAIJ, 2679 0, 2680 0, 2681 0, 2682 0, 2683 /* 34*/ MatDuplicate_SeqBAIJ, 2684 0, 2685 0, 2686 MatILUFactor_SeqBAIJ, 2687 0, 2688 /* 39*/ MatAXPY_SeqBAIJ, 2689 MatGetSubMatrices_SeqBAIJ, 2690 MatIncreaseOverlap_SeqBAIJ, 2691 MatGetValues_SeqBAIJ, 2692 MatCopy_SeqBAIJ, 2693 /* 44*/ 0, 2694 MatScale_SeqBAIJ, 2695 MatShift_SeqBAIJ, 2696 0, 2697 MatZeroRowsColumns_SeqBAIJ, 2698 /* 49*/ 0, 2699 MatGetRowIJ_SeqBAIJ, 2700 MatRestoreRowIJ_SeqBAIJ, 2701 MatGetColumnIJ_SeqBAIJ, 2702 MatRestoreColumnIJ_SeqBAIJ, 2703 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2704 0, 2705 0, 2706 0, 2707 MatSetValuesBlocked_SeqBAIJ, 2708 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2709 MatDestroy_SeqBAIJ, 2710 MatView_SeqBAIJ, 2711 0, 2712 0, 2713 /* 64*/ 0, 2714 0, 2715 0, 2716 0, 2717 0, 2718 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2719 0, 2720 MatConvert_Basic, 2721 0, 2722 0, 2723 /* 74*/ 0, 2724 MatFDColoringApply_BAIJ, 2725 0, 2726 0, 2727 0, 2728 /* 79*/ 0, 2729 0, 2730 0, 2731 0, 2732 MatLoad_SeqBAIJ, 2733 /* 84*/ 0, 2734 0, 2735 0, 2736 0, 2737 0, 2738 /* 89*/ 0, 2739 0, 2740 0, 2741 0, 2742 0, 2743 /* 94*/ 0, 2744 0, 2745 0, 2746 0, 2747 0, 2748 /* 99*/ 0, 2749 0, 2750 0, 2751 0, 2752 0, 2753 /*104*/ 0, 2754 MatRealPart_SeqBAIJ, 2755 MatImaginaryPart_SeqBAIJ, 2756 0, 2757 0, 2758 /*109*/ 0, 2759 0, 2760 0, 2761 0, 2762 MatMissingDiagonal_SeqBAIJ, 2763 /*114*/ 0, 2764 0, 2765 0, 2766 0, 2767 0, 2768 /*119*/ 0, 2769 0, 2770 MatMultHermitianTranspose_SeqBAIJ, 2771 MatMultHermitianTransposeAdd_SeqBAIJ, 2772 0, 2773 /*124*/ 0, 2774 0, 2775 MatInvertBlockDiagonal_SeqBAIJ, 2776 0, 2777 0, 2778 /*129*/ 0, 2779 0, 2780 0, 2781 0, 2782 0, 2783 /*134*/ 0, 2784 0, 2785 0, 2786 0, 2787 0, 2788 /*139*/ 0, 2789 0, 2790 0, 2791 MatFDColoringSetUp_SeqXAIJ, 2792 0, 2793 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2794 }; 2795 2796 #undef __FUNCT__ 2797 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2798 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2799 { 2800 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2801 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2802 PetscErrorCode ierr; 2803 2804 PetscFunctionBegin; 2805 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2806 2807 /* allocate space for values if not already there */ 2808 if (!aij->saved_values) { 2809 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2810 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2811 } 2812 2813 /* copy values over */ 2814 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2815 PetscFunctionReturn(0); 2816 } 2817 2818 #undef __FUNCT__ 2819 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2820 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2821 { 2822 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2823 PetscErrorCode ierr; 2824 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2825 2826 PetscFunctionBegin; 2827 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2828 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2829 2830 /* copy values over */ 2831 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2836 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2837 2838 #undef __FUNCT__ 2839 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2840 static PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2841 { 2842 Mat_SeqBAIJ *b; 2843 PetscErrorCode ierr; 2844 PetscInt i,mbs,nbs,bs2; 2845 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2846 2847 PetscFunctionBegin; 2848 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2849 if (nz == MAT_SKIP_ALLOCATION) { 2850 skipallocation = PETSC_TRUE; 2851 nz = 0; 2852 } 2853 2854 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2855 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2856 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2857 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2858 2859 B->preallocated = PETSC_TRUE; 2860 2861 mbs = B->rmap->n/bs; 2862 nbs = B->cmap->n/bs; 2863 bs2 = bs*bs; 2864 2865 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2866 2867 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2868 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2869 if (nnz) { 2870 for (i=0; i<mbs; i++) { 2871 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2872 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2873 } 2874 } 2875 2876 b = (Mat_SeqBAIJ*)B->data; 2877 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2878 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2879 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2880 2881 if (!flg) { 2882 switch (bs) { 2883 case 1: 2884 B->ops->mult = MatMult_SeqBAIJ_1; 2885 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2886 break; 2887 case 2: 2888 B->ops->mult = MatMult_SeqBAIJ_2; 2889 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2890 break; 2891 case 3: 2892 B->ops->mult = MatMult_SeqBAIJ_3; 2893 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2894 break; 2895 case 4: 2896 B->ops->mult = MatMult_SeqBAIJ_4; 2897 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2898 break; 2899 case 5: 2900 B->ops->mult = MatMult_SeqBAIJ_5; 2901 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2902 break; 2903 case 6: 2904 B->ops->mult = MatMult_SeqBAIJ_6; 2905 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2906 break; 2907 case 7: 2908 B->ops->mult = MatMult_SeqBAIJ_7; 2909 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2910 break; 2911 case 15: 2912 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2913 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2914 break; 2915 default: 2916 B->ops->mult = MatMult_SeqBAIJ_N; 2917 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2918 break; 2919 } 2920 } 2921 B->ops->sor = MatSOR_SeqBAIJ; 2922 b->mbs = mbs; 2923 b->nbs = nbs; 2924 if (!skipallocation) { 2925 if (!b->imax) { 2926 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2927 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2928 2929 b->free_imax_ilen = PETSC_TRUE; 2930 } 2931 /* b->ilen will count nonzeros in each block row so far. */ 2932 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2933 if (!nnz) { 2934 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2935 else if (nz < 0) nz = 1; 2936 for (i=0; i<mbs; i++) b->imax[i] = nz; 2937 nz = nz*mbs; 2938 } else { 2939 nz = 0; 2940 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2941 } 2942 2943 /* allocate the matrix space */ 2944 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2945 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2946 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2947 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2948 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2949 2950 b->singlemalloc = PETSC_TRUE; 2951 b->i[0] = 0; 2952 for (i=1; i<mbs+1; i++) { 2953 b->i[i] = b->i[i-1] + b->imax[i-1]; 2954 } 2955 b->free_a = PETSC_TRUE; 2956 b->free_ij = PETSC_TRUE; 2957 } else { 2958 b->free_a = PETSC_FALSE; 2959 b->free_ij = PETSC_FALSE; 2960 } 2961 2962 b->bs2 = bs2; 2963 b->mbs = mbs; 2964 b->nz = 0; 2965 b->maxnz = nz; 2966 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2967 B->was_assembled = PETSC_FALSE; 2968 B->assembled = PETSC_FALSE; 2969 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2970 PetscFunctionReturn(0); 2971 } 2972 2973 #undef __FUNCT__ 2974 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2975 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2976 { 2977 PetscInt i,m,nz,nz_max=0,*nnz; 2978 PetscScalar *values=0; 2979 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2980 PetscErrorCode ierr; 2981 2982 PetscFunctionBegin; 2983 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2984 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2985 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2986 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2987 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2988 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2989 m = B->rmap->n/bs; 2990 2991 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2992 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2993 for (i=0; i<m; i++) { 2994 nz = ii[i+1]- ii[i]; 2995 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2996 nz_max = PetscMax(nz_max, nz); 2997 nnz[i] = nz; 2998 } 2999 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 3000 ierr = PetscFree(nnz);CHKERRQ(ierr); 3001 3002 values = (PetscScalar*)V; 3003 if (!values) { 3004 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 3005 } 3006 for (i=0; i<m; i++) { 3007 PetscInt ncols = ii[i+1] - ii[i]; 3008 const PetscInt *icols = jj + ii[i]; 3009 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 3010 if (!roworiented) { 3011 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 3012 } else { 3013 PetscInt j; 3014 for (j=0; j<ncols; j++) { 3015 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 3016 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 3017 } 3018 } 3019 } 3020 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3021 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3022 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3023 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3024 PetscFunctionReturn(0); 3025 } 3026 3027 /*MC 3028 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3029 block sparse compressed row format. 3030 3031 Options Database Keys: 3032 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3033 3034 Level: beginner 3035 3036 .seealso: MatCreateSeqBAIJ() 3037 M*/ 3038 3039 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3040 3041 #undef __FUNCT__ 3042 #define __FUNCT__ "MatCreate_SeqBAIJ" 3043 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3044 { 3045 PetscErrorCode ierr; 3046 PetscMPIInt size; 3047 Mat_SeqBAIJ *b; 3048 3049 PetscFunctionBegin; 3050 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3051 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3052 3053 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3054 B->data = (void*)b; 3055 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3056 3057 b->row = 0; 3058 b->col = 0; 3059 b->icol = 0; 3060 b->reallocs = 0; 3061 b->saved_values = 0; 3062 3063 b->roworiented = PETSC_TRUE; 3064 b->nonew = 0; 3065 b->diag = 0; 3066 B->spptr = 0; 3067 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3068 b->keepnonzeropattern = PETSC_FALSE; 3069 3070 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3071 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3072 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3073 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3074 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3075 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3076 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3077 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3078 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3079 #if defined(PETSC_HAVE_HYPRE) 3080 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr); 3081 #endif 3082 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3083 PetscFunctionReturn(0); 3084 } 3085 3086 #undef __FUNCT__ 3087 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3088 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3089 { 3090 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3091 PetscErrorCode ierr; 3092 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3093 3094 PetscFunctionBegin; 3095 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3096 3097 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3098 c->imax = a->imax; 3099 c->ilen = a->ilen; 3100 c->free_imax_ilen = PETSC_FALSE; 3101 } else { 3102 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3103 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3104 for (i=0; i<mbs; i++) { 3105 c->imax[i] = a->imax[i]; 3106 c->ilen[i] = a->ilen[i]; 3107 } 3108 c->free_imax_ilen = PETSC_TRUE; 3109 } 3110 3111 /* allocate the matrix space */ 3112 if (mallocmatspace) { 3113 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3114 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3115 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3116 3117 c->i = a->i; 3118 c->j = a->j; 3119 c->singlemalloc = PETSC_FALSE; 3120 c->free_a = PETSC_TRUE; 3121 c->free_ij = PETSC_FALSE; 3122 c->parent = A; 3123 C->preallocated = PETSC_TRUE; 3124 C->assembled = PETSC_TRUE; 3125 3126 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3127 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3128 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3129 } else { 3130 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3131 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3132 3133 c->singlemalloc = PETSC_TRUE; 3134 c->free_a = PETSC_TRUE; 3135 c->free_ij = PETSC_TRUE; 3136 3137 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3138 if (mbs > 0) { 3139 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3140 if (cpvalues == MAT_COPY_VALUES) { 3141 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3142 } else { 3143 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3144 } 3145 } 3146 C->preallocated = PETSC_TRUE; 3147 C->assembled = PETSC_TRUE; 3148 } 3149 } 3150 3151 c->roworiented = a->roworiented; 3152 c->nonew = a->nonew; 3153 3154 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3155 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3156 3157 c->bs2 = a->bs2; 3158 c->mbs = a->mbs; 3159 c->nbs = a->nbs; 3160 3161 if (a->diag) { 3162 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3163 c->diag = a->diag; 3164 c->free_diag = PETSC_FALSE; 3165 } else { 3166 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3167 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3168 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3169 c->free_diag = PETSC_TRUE; 3170 } 3171 } else c->diag = 0; 3172 3173 c->nz = a->nz; 3174 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3175 c->solve_work = NULL; 3176 c->mult_work = NULL; 3177 c->sor_workt = NULL; 3178 c->sor_work = NULL; 3179 3180 c->compressedrow.use = a->compressedrow.use; 3181 c->compressedrow.nrows = a->compressedrow.nrows; 3182 if (a->compressedrow.use) { 3183 i = a->compressedrow.nrows; 3184 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3185 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3186 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3187 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3188 } else { 3189 c->compressedrow.use = PETSC_FALSE; 3190 c->compressedrow.i = NULL; 3191 c->compressedrow.rindex = NULL; 3192 } 3193 C->nonzerostate = A->nonzerostate; 3194 3195 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3196 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3197 PetscFunctionReturn(0); 3198 } 3199 3200 #undef __FUNCT__ 3201 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3202 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3203 { 3204 PetscErrorCode ierr; 3205 3206 PetscFunctionBegin; 3207 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3208 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3209 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3210 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3211 PetscFunctionReturn(0); 3212 } 3213 3214 #undef __FUNCT__ 3215 #define __FUNCT__ "MatLoad_SeqBAIJ" 3216 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3217 { 3218 Mat_SeqBAIJ *a; 3219 PetscErrorCode ierr; 3220 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3221 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3222 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3223 PetscInt *masked,nmask,tmp,bs2,ishift; 3224 PetscMPIInt size; 3225 int fd; 3226 PetscScalar *aa; 3227 MPI_Comm comm; 3228 3229 PetscFunctionBegin; 3230 /* force binary viewer to load .info file if it has not yet done so */ 3231 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3232 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3233 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3234 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3235 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3236 if (bs < 0) bs = 1; 3237 bs2 = bs*bs; 3238 3239 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3240 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3241 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3242 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3243 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3244 M = header[1]; N = header[2]; nz = header[3]; 3245 3246 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3247 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3248 3249 /* 3250 This code adds extra rows to make sure the number of rows is 3251 divisible by the blocksize 3252 */ 3253 mbs = M/bs; 3254 extra_rows = bs - M + bs*(mbs); 3255 if (extra_rows == bs) extra_rows = 0; 3256 else mbs++; 3257 if (extra_rows) { 3258 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3259 } 3260 3261 /* Set global sizes if not already set */ 3262 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3263 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3264 } else { /* Check if the matrix global sizes are correct */ 3265 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3266 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3267 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3268 } 3269 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3270 } 3271 3272 /* read in row lengths */ 3273 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3274 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3275 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3276 3277 /* read in column indices */ 3278 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3279 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3280 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3281 3282 /* loop over row lengths determining block row lengths */ 3283 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3284 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3285 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3286 rowcount = 0; 3287 nzcount = 0; 3288 for (i=0; i<mbs; i++) { 3289 nmask = 0; 3290 for (j=0; j<bs; j++) { 3291 kmax = rowlengths[rowcount]; 3292 for (k=0; k<kmax; k++) { 3293 tmp = jj[nzcount++]/bs; 3294 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3295 } 3296 rowcount++; 3297 } 3298 browlengths[i] += nmask; 3299 /* zero out the mask elements we set */ 3300 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3301 } 3302 3303 /* Do preallocation */ 3304 ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr); 3305 a = (Mat_SeqBAIJ*)newmat->data; 3306 3307 /* set matrix "i" values */ 3308 a->i[0] = 0; 3309 for (i=1; i<= mbs; i++) { 3310 a->i[i] = a->i[i-1] + browlengths[i-1]; 3311 a->ilen[i-1] = browlengths[i-1]; 3312 } 3313 a->nz = 0; 3314 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3315 3316 /* read in nonzero values */ 3317 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3318 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3319 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3320 3321 /* set "a" and "j" values into matrix */ 3322 nzcount = 0; jcount = 0; 3323 for (i=0; i<mbs; i++) { 3324 nzcountb = nzcount; 3325 nmask = 0; 3326 for (j=0; j<bs; j++) { 3327 kmax = rowlengths[i*bs+j]; 3328 for (k=0; k<kmax; k++) { 3329 tmp = jj[nzcount++]/bs; 3330 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3331 } 3332 } 3333 /* sort the masked values */ 3334 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3335 3336 /* set "j" values into matrix */ 3337 maskcount = 1; 3338 for (j=0; j<nmask; j++) { 3339 a->j[jcount++] = masked[j]; 3340 mask[masked[j]] = maskcount++; 3341 } 3342 /* set "a" values into matrix */ 3343 ishift = bs2*a->i[i]; 3344 for (j=0; j<bs; j++) { 3345 kmax = rowlengths[i*bs+j]; 3346 for (k=0; k<kmax; k++) { 3347 tmp = jj[nzcountb]/bs; 3348 block = mask[tmp] - 1; 3349 point = jj[nzcountb] - bs*tmp; 3350 idx = ishift + bs2*block + j + bs*point; 3351 a->a[idx] = (MatScalar)aa[nzcountb++]; 3352 } 3353 } 3354 /* zero out the mask elements we set */ 3355 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3356 } 3357 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3358 3359 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3360 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3361 ierr = PetscFree(aa);CHKERRQ(ierr); 3362 ierr = PetscFree(jj);CHKERRQ(ierr); 3363 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3364 3365 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3366 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3367 PetscFunctionReturn(0); 3368 } 3369 3370 #undef __FUNCT__ 3371 #define __FUNCT__ "MatCreateSeqBAIJ" 3372 /*@C 3373 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3374 compressed row) format. For good matrix assembly performance the 3375 user should preallocate the matrix storage by setting the parameter nz 3376 (or the array nnz). By setting these parameters accurately, performance 3377 during matrix assembly can be increased by more than a factor of 50. 3378 3379 Collective on MPI_Comm 3380 3381 Input Parameters: 3382 + comm - MPI communicator, set to PETSC_COMM_SELF 3383 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3384 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3385 . m - number of rows 3386 . n - number of columns 3387 . nz - number of nonzero blocks per block row (same for all rows) 3388 - nnz - array containing the number of nonzero blocks in the various block rows 3389 (possibly different for each block row) or NULL 3390 3391 Output Parameter: 3392 . A - the matrix 3393 3394 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3395 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3396 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3397 3398 Options Database Keys: 3399 . -mat_no_unroll - uses code that does not unroll the loops in the 3400 block calculations (much slower) 3401 . -mat_block_size - size of the blocks to use 3402 3403 Level: intermediate 3404 3405 Notes: 3406 The number of rows and columns must be divisible by blocksize. 3407 3408 If the nnz parameter is given then the nz parameter is ignored 3409 3410 A nonzero block is any block that as 1 or more nonzeros in it 3411 3412 The block AIJ format is fully compatible with standard Fortran 77 3413 storage. That is, the stored row and column indices can begin at 3414 either one (as in Fortran) or zero. See the users' manual for details. 3415 3416 Specify the preallocated storage with either nz or nnz (not both). 3417 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3418 allocation. See Users-Manual: ch_mat for details. 3419 matrices. 3420 3421 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3422 @*/ 3423 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3424 { 3425 PetscErrorCode ierr; 3426 3427 PetscFunctionBegin; 3428 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3429 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3430 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3431 ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3432 PetscFunctionReturn(0); 3433 } 3434 3435 #undef __FUNCT__ 3436 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3437 /*@C 3438 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3439 per row in the matrix. For good matrix assembly performance the 3440 user should preallocate the matrix storage by setting the parameter nz 3441 (or the array nnz). By setting these parameters accurately, performance 3442 during matrix assembly can be increased by more than a factor of 50. 3443 3444 Collective on MPI_Comm 3445 3446 Input Parameters: 3447 + B - the matrix 3448 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3449 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3450 . nz - number of block nonzeros per block row (same for all rows) 3451 - nnz - array containing the number of block nonzeros in the various block rows 3452 (possibly different for each block row) or NULL 3453 3454 Options Database Keys: 3455 . -mat_no_unroll - uses code that does not unroll the loops in the 3456 block calculations (much slower) 3457 . -mat_block_size - size of the blocks to use 3458 3459 Level: intermediate 3460 3461 Notes: 3462 If the nnz parameter is given then the nz parameter is ignored 3463 3464 You can call MatGetInfo() to get information on how effective the preallocation was; 3465 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3466 You can also run with the option -info and look for messages with the string 3467 malloc in them to see if additional memory allocation was needed. 3468 3469 The block AIJ format is fully compatible with standard Fortran 77 3470 storage. That is, the stored row and column indices can begin at 3471 either one (as in Fortran) or zero. See the users' manual for details. 3472 3473 Specify the preallocated storage with either nz or nnz (not both). 3474 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3475 allocation. See Users-Manual: ch_mat for details. 3476 3477 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3478 @*/ 3479 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3480 { 3481 PetscErrorCode ierr; 3482 3483 PetscFunctionBegin; 3484 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3485 PetscValidType(B,1); 3486 PetscValidLogicalCollectiveInt(B,bs,2); 3487 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3488 PetscFunctionReturn(0); 3489 } 3490 3491 #undef __FUNCT__ 3492 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3493 /*@C 3494 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3495 (the default sequential PETSc format). 3496 3497 Collective on MPI_Comm 3498 3499 Input Parameters: 3500 + B - the matrix 3501 . i - the indices into j for the start of each local row (starts with zero) 3502 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3503 - v - optional values in the matrix 3504 3505 Level: developer 3506 3507 Notes: 3508 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3509 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3510 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3511 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3512 block column and the second index is over columns within a block. 3513 3514 .keywords: matrix, aij, compressed row, sparse 3515 3516 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3517 @*/ 3518 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3519 { 3520 PetscErrorCode ierr; 3521 3522 PetscFunctionBegin; 3523 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3524 PetscValidType(B,1); 3525 PetscValidLogicalCollectiveInt(B,bs,2); 3526 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3527 PetscFunctionReturn(0); 3528 } 3529 3530 3531 #undef __FUNCT__ 3532 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3533 /*@ 3534 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3535 3536 Collective on MPI_Comm 3537 3538 Input Parameters: 3539 + comm - must be an MPI communicator of size 1 3540 . bs - size of block 3541 . m - number of rows 3542 . n - number of columns 3543 . i - row indices 3544 . j - column indices 3545 - a - matrix values 3546 3547 Output Parameter: 3548 . mat - the matrix 3549 3550 Level: advanced 3551 3552 Notes: 3553 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3554 once the matrix is destroyed 3555 3556 You cannot set new nonzero locations into this matrix, that will generate an error. 3557 3558 The i and j indices are 0 based 3559 3560 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3561 3562 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3563 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3564 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3565 with column-major ordering within blocks. 3566 3567 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3568 3569 @*/ 3570 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3571 { 3572 PetscErrorCode ierr; 3573 PetscInt ii; 3574 Mat_SeqBAIJ *baij; 3575 3576 PetscFunctionBegin; 3577 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3578 if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3579 3580 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3581 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3582 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3583 ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3584 baij = (Mat_SeqBAIJ*)(*mat)->data; 3585 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3586 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3587 3588 baij->i = i; 3589 baij->j = j; 3590 baij->a = a; 3591 3592 baij->singlemalloc = PETSC_FALSE; 3593 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3594 baij->free_a = PETSC_FALSE; 3595 baij->free_ij = PETSC_FALSE; 3596 3597 for (ii=0; ii<m; ii++) { 3598 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3599 #if defined(PETSC_USE_DEBUG) 3600 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3601 #endif 3602 } 3603 #if defined(PETSC_USE_DEBUG) 3604 for (ii=0; ii<baij->i[m]; ii++) { 3605 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3606 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3607 } 3608 #endif 3609 3610 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3611 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3612 PetscFunctionReturn(0); 3613 } 3614 3615 #undef __FUNCT__ 3616 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3617 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3618 { 3619 PetscErrorCode ierr; 3620 3621 PetscFunctionBegin; 3622 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3623 PetscFunctionReturn(0); 3624 } 3625