1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the MAIJ matrix storage format. 5 This format is used for restriction and interpolation operations for 6 multicomponent problems. It interpolates each component the same way 7 independently. 8 9 We provide: 10 MatMult() 11 MatMultTranspose() 12 MatMultTransposeAdd() 13 MatMultAdd() 14 and 15 MatCreateMAIJ(Mat,dof,Mat*) 16 17 This single directory handles both the sequential and parallel codes 18 */ 19 20 #include "src/mat/impls/maij/maij.h" 21 #include "src/mat/utils/freespace.h" 22 #include "private/vecimpl.h" 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatMAIJGetAIJ" 26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 27 { 28 PetscErrorCode ierr; 29 PetscTruth ismpimaij,isseqmaij; 30 31 PetscFunctionBegin; 32 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 33 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 34 if (ismpimaij) { 35 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 36 37 *B = b->A; 38 } else if (isseqmaij) { 39 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40 41 *B = b->AIJ; 42 } else { 43 *B = A; 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMAIJRedimension" 50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 51 { 52 PetscErrorCode ierr; 53 Mat Aij; 54 55 PetscFunctionBegin; 56 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 57 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatDestroy_SeqMAIJ" 63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 64 { 65 PetscErrorCode ierr; 66 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 67 68 PetscFunctionBegin; 69 if (b->AIJ) { 70 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 71 } 72 ierr = PetscFree(b);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatView_SeqMAIJ" 78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 79 { 80 PetscErrorCode ierr; 81 Mat B; 82 83 PetscFunctionBegin; 84 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 85 ierr = MatView(B,viewer);CHKERRQ(ierr); 86 ierr = MatDestroy(B);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatView_MPIMAIJ" 92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 93 { 94 PetscErrorCode ierr; 95 Mat B; 96 97 PetscFunctionBegin; 98 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 99 ierr = MatView(B,viewer);CHKERRQ(ierr); 100 ierr = MatDestroy(B);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatDestroy_MPIMAIJ" 106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 107 { 108 PetscErrorCode ierr; 109 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 110 111 PetscFunctionBegin; 112 if (b->AIJ) { 113 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 114 } 115 if (b->OAIJ) { 116 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 117 } 118 if (b->A) { 119 ierr = MatDestroy(b->A);CHKERRQ(ierr); 120 } 121 if (b->ctx) { 122 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 123 } 124 if (b->w) { 125 ierr = VecDestroy(b->w);CHKERRQ(ierr); 126 } 127 ierr = PetscFree(b);CHKERRQ(ierr); 128 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 /*MC 133 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 134 multicomponent problems, interpolating or restricting each component the same way independently. 135 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 136 137 Operations provided: 138 . MatMult 139 . MatMultTranspose 140 . MatMultAdd 141 . MatMultTransposeAdd 142 143 Level: advanced 144 145 .seealso: MatCreateSeqDense 146 M*/ 147 148 EXTERN_C_BEGIN 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatCreate_MAIJ" 151 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 152 { 153 PetscErrorCode ierr; 154 Mat_MPIMAIJ *b; 155 PetscMPIInt size; 156 157 PetscFunctionBegin; 158 ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 159 A->data = (void*)b; 160 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 161 A->factor = 0; 162 A->mapping = 0; 163 164 b->AIJ = 0; 165 b->dof = 0; 166 b->OAIJ = 0; 167 b->ctx = 0; 168 b->w = 0; 169 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 170 if (size == 1){ 171 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 172 } else { 173 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 174 } 175 PetscFunctionReturn(0); 176 } 177 EXTERN_C_END 178 179 /* --------------------------------------------------------------------------------------*/ 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatMult_SeqMAIJ_2" 182 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 183 { 184 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 186 PetscScalar *x,*y,*v,sum1, sum2; 187 PetscErrorCode ierr; 188 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 189 PetscInt n,i,jrow,j; 190 191 PetscFunctionBegin; 192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 193 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 194 idx = a->j; 195 v = a->a; 196 ii = a->i; 197 198 for (i=0; i<m; i++) { 199 jrow = ii[i]; 200 n = ii[i+1] - jrow; 201 sum1 = 0.0; 202 sum2 = 0.0; 203 nonzerorow += (n>0); 204 for (j=0; j<n; j++) { 205 sum1 += v[jrow]*x[2*idx[jrow]]; 206 sum2 += v[jrow]*x[2*idx[jrow]+1]; 207 jrow++; 208 } 209 y[2*i] = sum1; 210 y[2*i+1] = sum2; 211 } 212 213 ierr = PetscLogFlops(4*a->nz - 2*nonzerorow);CHKERRQ(ierr); 214 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 215 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 216 PetscFunctionReturn(0); 217 } 218 219 #undef __FUNCT__ 220 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 221 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 222 { 223 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 224 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 225 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 226 PetscErrorCode ierr; 227 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 228 229 PetscFunctionBegin; 230 ierr = VecSet(yy,zero);CHKERRQ(ierr); 231 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 232 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 233 234 for (i=0; i<m; i++) { 235 idx = a->j + a->i[i] ; 236 v = a->a + a->i[i] ; 237 n = a->i[i+1] - a->i[i]; 238 alpha1 = x[2*i]; 239 alpha2 = x[2*i+1]; 240 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 241 } 242 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 243 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 244 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 #undef __FUNCT__ 249 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 250 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 251 { 252 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 253 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 254 PetscScalar *x,*y,*v,sum1, sum2; 255 PetscErrorCode ierr; 256 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 257 PetscInt n,i,jrow,j; 258 259 PetscFunctionBegin; 260 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 261 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 262 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 263 idx = a->j; 264 v = a->a; 265 ii = a->i; 266 267 for (i=0; i<m; i++) { 268 jrow = ii[i]; 269 n = ii[i+1] - jrow; 270 sum1 = 0.0; 271 sum2 = 0.0; 272 for (j=0; j<n; j++) { 273 sum1 += v[jrow]*x[2*idx[jrow]]; 274 sum2 += v[jrow]*x[2*idx[jrow]+1]; 275 jrow++; 276 } 277 y[2*i] += sum1; 278 y[2*i+1] += sum2; 279 } 280 281 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 282 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 283 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 #undef __FUNCT__ 287 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 288 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 289 { 290 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 291 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 292 PetscScalar *x,*y,*v,alpha1,alpha2; 293 PetscErrorCode ierr; 294 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 295 296 PetscFunctionBegin; 297 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 298 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 299 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 300 301 for (i=0; i<m; i++) { 302 idx = a->j + a->i[i] ; 303 v = a->a + a->i[i] ; 304 n = a->i[i+1] - a->i[i]; 305 alpha1 = x[2*i]; 306 alpha2 = x[2*i+1]; 307 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 308 } 309 ierr = PetscLogFlops(4*a->nz);CHKERRQ(ierr); 310 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 311 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 312 PetscFunctionReturn(0); 313 } 314 /* --------------------------------------------------------------------------------------*/ 315 #undef __FUNCT__ 316 #define __FUNCT__ "MatMult_SeqMAIJ_3" 317 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 318 { 319 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 320 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 321 PetscScalar *x,*y,*v,sum1, sum2, sum3; 322 PetscErrorCode ierr; 323 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 324 PetscInt n,i,jrow,j; 325 326 PetscFunctionBegin; 327 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 328 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 329 idx = a->j; 330 v = a->a; 331 ii = a->i; 332 333 for (i=0; i<m; i++) { 334 jrow = ii[i]; 335 n = ii[i+1] - jrow; 336 sum1 = 0.0; 337 sum2 = 0.0; 338 sum3 = 0.0; 339 nonzerorow += (n>0); 340 for (j=0; j<n; j++) { 341 sum1 += v[jrow]*x[3*idx[jrow]]; 342 sum2 += v[jrow]*x[3*idx[jrow]+1]; 343 sum3 += v[jrow]*x[3*idx[jrow]+2]; 344 jrow++; 345 } 346 y[3*i] = sum1; 347 y[3*i+1] = sum2; 348 y[3*i+2] = sum3; 349 } 350 351 ierr = PetscLogFlops(6*a->nz - 3*nonzerorow);CHKERRQ(ierr); 352 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 353 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 354 PetscFunctionReturn(0); 355 } 356 357 #undef __FUNCT__ 358 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 359 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 360 { 361 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 362 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 363 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 364 PetscErrorCode ierr; 365 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 366 367 PetscFunctionBegin; 368 ierr = VecSet(yy,zero);CHKERRQ(ierr); 369 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 370 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 371 372 for (i=0; i<m; i++) { 373 idx = a->j + a->i[i]; 374 v = a->a + a->i[i]; 375 n = a->i[i+1] - a->i[i]; 376 alpha1 = x[3*i]; 377 alpha2 = x[3*i+1]; 378 alpha3 = x[3*i+2]; 379 while (n-->0) { 380 y[3*(*idx)] += alpha1*(*v); 381 y[3*(*idx)+1] += alpha2*(*v); 382 y[3*(*idx)+2] += alpha3*(*v); 383 idx++; v++; 384 } 385 } 386 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 387 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 388 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 389 PetscFunctionReturn(0); 390 } 391 392 #undef __FUNCT__ 393 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 394 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 395 { 396 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 397 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 398 PetscScalar *x,*y,*v,sum1, sum2, sum3; 399 PetscErrorCode ierr; 400 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 401 PetscInt n,i,jrow,j; 402 403 PetscFunctionBegin; 404 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 405 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 406 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 407 idx = a->j; 408 v = a->a; 409 ii = a->i; 410 411 for (i=0; i<m; i++) { 412 jrow = ii[i]; 413 n = ii[i+1] - jrow; 414 sum1 = 0.0; 415 sum2 = 0.0; 416 sum3 = 0.0; 417 for (j=0; j<n; j++) { 418 sum1 += v[jrow]*x[3*idx[jrow]]; 419 sum2 += v[jrow]*x[3*idx[jrow]+1]; 420 sum3 += v[jrow]*x[3*idx[jrow]+2]; 421 jrow++; 422 } 423 y[3*i] += sum1; 424 y[3*i+1] += sum2; 425 y[3*i+2] += sum3; 426 } 427 428 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 429 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 430 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 431 PetscFunctionReturn(0); 432 } 433 #undef __FUNCT__ 434 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 435 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 436 { 437 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 438 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 439 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 440 PetscErrorCode ierr; 441 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 442 443 PetscFunctionBegin; 444 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 445 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 446 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 447 for (i=0; i<m; i++) { 448 idx = a->j + a->i[i] ; 449 v = a->a + a->i[i] ; 450 n = a->i[i+1] - a->i[i]; 451 alpha1 = x[3*i]; 452 alpha2 = x[3*i+1]; 453 alpha3 = x[3*i+2]; 454 while (n-->0) { 455 y[3*(*idx)] += alpha1*(*v); 456 y[3*(*idx)+1] += alpha2*(*v); 457 y[3*(*idx)+2] += alpha3*(*v); 458 idx++; v++; 459 } 460 } 461 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 462 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 463 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 464 PetscFunctionReturn(0); 465 } 466 467 /* ------------------------------------------------------------------------------*/ 468 #undef __FUNCT__ 469 #define __FUNCT__ "MatMult_SeqMAIJ_4" 470 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 471 { 472 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 473 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 474 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 475 PetscErrorCode ierr; 476 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 477 PetscInt n,i,jrow,j; 478 479 PetscFunctionBegin; 480 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 481 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 482 idx = a->j; 483 v = a->a; 484 ii = a->i; 485 486 for (i=0; i<m; i++) { 487 jrow = ii[i]; 488 n = ii[i+1] - jrow; 489 sum1 = 0.0; 490 sum2 = 0.0; 491 sum3 = 0.0; 492 sum4 = 0.0; 493 nonzerorow += (n>0); 494 for (j=0; j<n; j++) { 495 sum1 += v[jrow]*x[4*idx[jrow]]; 496 sum2 += v[jrow]*x[4*idx[jrow]+1]; 497 sum3 += v[jrow]*x[4*idx[jrow]+2]; 498 sum4 += v[jrow]*x[4*idx[jrow]+3]; 499 jrow++; 500 } 501 y[4*i] = sum1; 502 y[4*i+1] = sum2; 503 y[4*i+2] = sum3; 504 y[4*i+3] = sum4; 505 } 506 507 ierr = PetscLogFlops(8*a->nz - 4*nonzerorow);CHKERRQ(ierr); 508 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 509 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 510 PetscFunctionReturn(0); 511 } 512 513 #undef __FUNCT__ 514 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 515 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 516 { 517 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 518 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 519 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 520 PetscErrorCode ierr; 521 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 522 523 PetscFunctionBegin; 524 ierr = VecSet(yy,zero);CHKERRQ(ierr); 525 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 526 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 527 for (i=0; i<m; i++) { 528 idx = a->j + a->i[i] ; 529 v = a->a + a->i[i] ; 530 n = a->i[i+1] - a->i[i]; 531 alpha1 = x[4*i]; 532 alpha2 = x[4*i+1]; 533 alpha3 = x[4*i+2]; 534 alpha4 = x[4*i+3]; 535 while (n-->0) { 536 y[4*(*idx)] += alpha1*(*v); 537 y[4*(*idx)+1] += alpha2*(*v); 538 y[4*(*idx)+2] += alpha3*(*v); 539 y[4*(*idx)+3] += alpha4*(*v); 540 idx++; v++; 541 } 542 } 543 ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr); 544 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 545 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 546 PetscFunctionReturn(0); 547 } 548 549 #undef __FUNCT__ 550 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 551 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 552 { 553 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 554 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 555 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 556 PetscErrorCode ierr; 557 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 558 PetscInt n,i,jrow,j; 559 560 PetscFunctionBegin; 561 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 562 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 563 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 564 idx = a->j; 565 v = a->a; 566 ii = a->i; 567 568 for (i=0; i<m; i++) { 569 jrow = ii[i]; 570 n = ii[i+1] - jrow; 571 sum1 = 0.0; 572 sum2 = 0.0; 573 sum3 = 0.0; 574 sum4 = 0.0; 575 for (j=0; j<n; j++) { 576 sum1 += v[jrow]*x[4*idx[jrow]]; 577 sum2 += v[jrow]*x[4*idx[jrow]+1]; 578 sum3 += v[jrow]*x[4*idx[jrow]+2]; 579 sum4 += v[jrow]*x[4*idx[jrow]+3]; 580 jrow++; 581 } 582 y[4*i] += sum1; 583 y[4*i+1] += sum2; 584 y[4*i+2] += sum3; 585 y[4*i+3] += sum4; 586 } 587 588 ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr); 589 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 590 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 591 PetscFunctionReturn(0); 592 } 593 #undef __FUNCT__ 594 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 595 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 596 { 597 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 598 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 599 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 600 PetscErrorCode ierr; 601 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 602 603 PetscFunctionBegin; 604 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 605 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 606 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 607 608 for (i=0; i<m; i++) { 609 idx = a->j + a->i[i] ; 610 v = a->a + a->i[i] ; 611 n = a->i[i+1] - a->i[i]; 612 alpha1 = x[4*i]; 613 alpha2 = x[4*i+1]; 614 alpha3 = x[4*i+2]; 615 alpha4 = x[4*i+3]; 616 while (n-->0) { 617 y[4*(*idx)] += alpha1*(*v); 618 y[4*(*idx)+1] += alpha2*(*v); 619 y[4*(*idx)+2] += alpha3*(*v); 620 y[4*(*idx)+3] += alpha4*(*v); 621 idx++; v++; 622 } 623 } 624 ierr = PetscLogFlops(8*a->nz);CHKERRQ(ierr); 625 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 626 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 627 PetscFunctionReturn(0); 628 } 629 /* ------------------------------------------------------------------------------*/ 630 631 #undef __FUNCT__ 632 #define __FUNCT__ "MatMult_SeqMAIJ_5" 633 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 634 { 635 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 636 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 637 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 638 PetscErrorCode ierr; 639 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 640 PetscInt n,i,jrow,j; 641 642 PetscFunctionBegin; 643 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 644 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 645 idx = a->j; 646 v = a->a; 647 ii = a->i; 648 649 for (i=0; i<m; i++) { 650 jrow = ii[i]; 651 n = ii[i+1] - jrow; 652 sum1 = 0.0; 653 sum2 = 0.0; 654 sum3 = 0.0; 655 sum4 = 0.0; 656 sum5 = 0.0; 657 nonzerorow += (n>0); 658 for (j=0; j<n; j++) { 659 sum1 += v[jrow]*x[5*idx[jrow]]; 660 sum2 += v[jrow]*x[5*idx[jrow]+1]; 661 sum3 += v[jrow]*x[5*idx[jrow]+2]; 662 sum4 += v[jrow]*x[5*idx[jrow]+3]; 663 sum5 += v[jrow]*x[5*idx[jrow]+4]; 664 jrow++; 665 } 666 y[5*i] = sum1; 667 y[5*i+1] = sum2; 668 y[5*i+2] = sum3; 669 y[5*i+3] = sum4; 670 y[5*i+4] = sum5; 671 } 672 673 ierr = PetscLogFlops(10*a->nz - 5*nonzerorow);CHKERRQ(ierr); 674 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 675 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 676 PetscFunctionReturn(0); 677 } 678 679 #undef __FUNCT__ 680 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 681 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 682 { 683 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 684 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 685 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 686 PetscErrorCode ierr; 687 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 688 689 PetscFunctionBegin; 690 ierr = VecSet(yy,zero);CHKERRQ(ierr); 691 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 692 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 693 694 for (i=0; i<m; i++) { 695 idx = a->j + a->i[i] ; 696 v = a->a + a->i[i] ; 697 n = a->i[i+1] - a->i[i]; 698 alpha1 = x[5*i]; 699 alpha2 = x[5*i+1]; 700 alpha3 = x[5*i+2]; 701 alpha4 = x[5*i+3]; 702 alpha5 = x[5*i+4]; 703 while (n-->0) { 704 y[5*(*idx)] += alpha1*(*v); 705 y[5*(*idx)+1] += alpha2*(*v); 706 y[5*(*idx)+2] += alpha3*(*v); 707 y[5*(*idx)+3] += alpha4*(*v); 708 y[5*(*idx)+4] += alpha5*(*v); 709 idx++; v++; 710 } 711 } 712 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 713 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 714 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 715 PetscFunctionReturn(0); 716 } 717 718 #undef __FUNCT__ 719 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 720 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 721 { 722 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 723 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 724 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 725 PetscErrorCode ierr; 726 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 727 PetscInt n,i,jrow,j; 728 729 PetscFunctionBegin; 730 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 731 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 732 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 733 idx = a->j; 734 v = a->a; 735 ii = a->i; 736 737 for (i=0; i<m; i++) { 738 jrow = ii[i]; 739 n = ii[i+1] - jrow; 740 sum1 = 0.0; 741 sum2 = 0.0; 742 sum3 = 0.0; 743 sum4 = 0.0; 744 sum5 = 0.0; 745 for (j=0; j<n; j++) { 746 sum1 += v[jrow]*x[5*idx[jrow]]; 747 sum2 += v[jrow]*x[5*idx[jrow]+1]; 748 sum3 += v[jrow]*x[5*idx[jrow]+2]; 749 sum4 += v[jrow]*x[5*idx[jrow]+3]; 750 sum5 += v[jrow]*x[5*idx[jrow]+4]; 751 jrow++; 752 } 753 y[5*i] += sum1; 754 y[5*i+1] += sum2; 755 y[5*i+2] += sum3; 756 y[5*i+3] += sum4; 757 y[5*i+4] += sum5; 758 } 759 760 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 761 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 762 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 763 PetscFunctionReturn(0); 764 } 765 766 #undef __FUNCT__ 767 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 768 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 769 { 770 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 771 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 772 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 773 PetscErrorCode ierr; 774 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 775 776 PetscFunctionBegin; 777 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 778 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 779 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 780 781 for (i=0; i<m; i++) { 782 idx = a->j + a->i[i] ; 783 v = a->a + a->i[i] ; 784 n = a->i[i+1] - a->i[i]; 785 alpha1 = x[5*i]; 786 alpha2 = x[5*i+1]; 787 alpha3 = x[5*i+2]; 788 alpha4 = x[5*i+3]; 789 alpha5 = x[5*i+4]; 790 while (n-->0) { 791 y[5*(*idx)] += alpha1*(*v); 792 y[5*(*idx)+1] += alpha2*(*v); 793 y[5*(*idx)+2] += alpha3*(*v); 794 y[5*(*idx)+3] += alpha4*(*v); 795 y[5*(*idx)+4] += alpha5*(*v); 796 idx++; v++; 797 } 798 } 799 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 800 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 801 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 802 PetscFunctionReturn(0); 803 } 804 805 /* ------------------------------------------------------------------------------*/ 806 #undef __FUNCT__ 807 #define __FUNCT__ "MatMult_SeqMAIJ_6" 808 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 809 { 810 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 811 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 812 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 813 PetscErrorCode ierr; 814 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 815 PetscInt n,i,jrow,j; 816 817 PetscFunctionBegin; 818 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 819 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 820 idx = a->j; 821 v = a->a; 822 ii = a->i; 823 824 for (i=0; i<m; i++) { 825 jrow = ii[i]; 826 n = ii[i+1] - jrow; 827 sum1 = 0.0; 828 sum2 = 0.0; 829 sum3 = 0.0; 830 sum4 = 0.0; 831 sum5 = 0.0; 832 sum6 = 0.0; 833 nonzerorow += (n>0); 834 for (j=0; j<n; j++) { 835 sum1 += v[jrow]*x[6*idx[jrow]]; 836 sum2 += v[jrow]*x[6*idx[jrow]+1]; 837 sum3 += v[jrow]*x[6*idx[jrow]+2]; 838 sum4 += v[jrow]*x[6*idx[jrow]+3]; 839 sum5 += v[jrow]*x[6*idx[jrow]+4]; 840 sum6 += v[jrow]*x[6*idx[jrow]+5]; 841 jrow++; 842 } 843 y[6*i] = sum1; 844 y[6*i+1] = sum2; 845 y[6*i+2] = sum3; 846 y[6*i+3] = sum4; 847 y[6*i+4] = sum5; 848 y[6*i+5] = sum6; 849 } 850 851 ierr = PetscLogFlops(12*a->nz - 6*nonzerorow);CHKERRQ(ierr); 852 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 853 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 854 PetscFunctionReturn(0); 855 } 856 857 #undef __FUNCT__ 858 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 859 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 860 { 861 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 862 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 863 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 864 PetscErrorCode ierr; 865 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 866 867 PetscFunctionBegin; 868 ierr = VecSet(yy,zero);CHKERRQ(ierr); 869 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 870 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 871 872 for (i=0; i<m; i++) { 873 idx = a->j + a->i[i] ; 874 v = a->a + a->i[i] ; 875 n = a->i[i+1] - a->i[i]; 876 alpha1 = x[6*i]; 877 alpha2 = x[6*i+1]; 878 alpha3 = x[6*i+2]; 879 alpha4 = x[6*i+3]; 880 alpha5 = x[6*i+4]; 881 alpha6 = x[6*i+5]; 882 while (n-->0) { 883 y[6*(*idx)] += alpha1*(*v); 884 y[6*(*idx)+1] += alpha2*(*v); 885 y[6*(*idx)+2] += alpha3*(*v); 886 y[6*(*idx)+3] += alpha4*(*v); 887 y[6*(*idx)+4] += alpha5*(*v); 888 y[6*(*idx)+5] += alpha6*(*v); 889 idx++; v++; 890 } 891 } 892 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 893 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 894 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 895 PetscFunctionReturn(0); 896 } 897 898 #undef __FUNCT__ 899 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 900 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 901 { 902 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 903 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 904 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 905 PetscErrorCode ierr; 906 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 907 PetscInt n,i,jrow,j; 908 909 PetscFunctionBegin; 910 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 911 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 912 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 913 idx = a->j; 914 v = a->a; 915 ii = a->i; 916 917 for (i=0; i<m; i++) { 918 jrow = ii[i]; 919 n = ii[i+1] - jrow; 920 sum1 = 0.0; 921 sum2 = 0.0; 922 sum3 = 0.0; 923 sum4 = 0.0; 924 sum5 = 0.0; 925 sum6 = 0.0; 926 for (j=0; j<n; j++) { 927 sum1 += v[jrow]*x[6*idx[jrow]]; 928 sum2 += v[jrow]*x[6*idx[jrow]+1]; 929 sum3 += v[jrow]*x[6*idx[jrow]+2]; 930 sum4 += v[jrow]*x[6*idx[jrow]+3]; 931 sum5 += v[jrow]*x[6*idx[jrow]+4]; 932 sum6 += v[jrow]*x[6*idx[jrow]+5]; 933 jrow++; 934 } 935 y[6*i] += sum1; 936 y[6*i+1] += sum2; 937 y[6*i+2] += sum3; 938 y[6*i+3] += sum4; 939 y[6*i+4] += sum5; 940 y[6*i+5] += sum6; 941 } 942 943 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 944 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 945 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 946 PetscFunctionReturn(0); 947 } 948 949 #undef __FUNCT__ 950 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 951 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 952 { 953 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 954 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 955 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 956 PetscErrorCode ierr; 957 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 958 959 PetscFunctionBegin; 960 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 961 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 962 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 963 964 for (i=0; i<m; i++) { 965 idx = a->j + a->i[i] ; 966 v = a->a + a->i[i] ; 967 n = a->i[i+1] - a->i[i]; 968 alpha1 = x[6*i]; 969 alpha2 = x[6*i+1]; 970 alpha3 = x[6*i+2]; 971 alpha4 = x[6*i+3]; 972 alpha5 = x[6*i+4]; 973 alpha6 = x[6*i+5]; 974 while (n-->0) { 975 y[6*(*idx)] += alpha1*(*v); 976 y[6*(*idx)+1] += alpha2*(*v); 977 y[6*(*idx)+2] += alpha3*(*v); 978 y[6*(*idx)+3] += alpha4*(*v); 979 y[6*(*idx)+4] += alpha5*(*v); 980 y[6*(*idx)+5] += alpha6*(*v); 981 idx++; v++; 982 } 983 } 984 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 985 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 986 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 /* ------------------------------------------------------------------------------*/ 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatMult_SeqMAIJ_7" 993 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 994 { 995 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 996 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 997 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 998 PetscErrorCode ierr; 999 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 1000 PetscInt n,i,jrow,j; 1001 1002 PetscFunctionBegin; 1003 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1004 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1005 idx = a->j; 1006 v = a->a; 1007 ii = a->i; 1008 1009 for (i=0; i<m; i++) { 1010 jrow = ii[i]; 1011 n = ii[i+1] - jrow; 1012 sum1 = 0.0; 1013 sum2 = 0.0; 1014 sum3 = 0.0; 1015 sum4 = 0.0; 1016 sum5 = 0.0; 1017 sum6 = 0.0; 1018 sum7 = 0.0; 1019 nonzerorow += (n>0); 1020 for (j=0; j<n; j++) { 1021 sum1 += v[jrow]*x[7*idx[jrow]]; 1022 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1023 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1024 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1025 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1026 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1027 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1028 jrow++; 1029 } 1030 y[7*i] = sum1; 1031 y[7*i+1] = sum2; 1032 y[7*i+2] = sum3; 1033 y[7*i+3] = sum4; 1034 y[7*i+4] = sum5; 1035 y[7*i+5] = sum6; 1036 y[7*i+6] = sum7; 1037 } 1038 1039 ierr = PetscLogFlops(14*a->nz - 7*nonzerorow);CHKERRQ(ierr); 1040 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1041 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1042 PetscFunctionReturn(0); 1043 } 1044 1045 #undef __FUNCT__ 1046 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1047 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1048 { 1049 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1050 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1051 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1052 PetscErrorCode ierr; 1053 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1054 1055 PetscFunctionBegin; 1056 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1057 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1058 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1059 1060 for (i=0; i<m; i++) { 1061 idx = a->j + a->i[i] ; 1062 v = a->a + a->i[i] ; 1063 n = a->i[i+1] - a->i[i]; 1064 alpha1 = x[7*i]; 1065 alpha2 = x[7*i+1]; 1066 alpha3 = x[7*i+2]; 1067 alpha4 = x[7*i+3]; 1068 alpha5 = x[7*i+4]; 1069 alpha6 = x[7*i+5]; 1070 alpha7 = x[7*i+6]; 1071 while (n-->0) { 1072 y[7*(*idx)] += alpha1*(*v); 1073 y[7*(*idx)+1] += alpha2*(*v); 1074 y[7*(*idx)+2] += alpha3*(*v); 1075 y[7*(*idx)+3] += alpha4*(*v); 1076 y[7*(*idx)+4] += alpha5*(*v); 1077 y[7*(*idx)+5] += alpha6*(*v); 1078 y[7*(*idx)+6] += alpha7*(*v); 1079 idx++; v++; 1080 } 1081 } 1082 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1083 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1084 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 #undef __FUNCT__ 1089 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1090 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1091 { 1092 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1093 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1094 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1095 PetscErrorCode ierr; 1096 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1097 PetscInt n,i,jrow,j; 1098 1099 PetscFunctionBegin; 1100 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1101 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1102 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1103 idx = a->j; 1104 v = a->a; 1105 ii = a->i; 1106 1107 for (i=0; i<m; i++) { 1108 jrow = ii[i]; 1109 n = ii[i+1] - jrow; 1110 sum1 = 0.0; 1111 sum2 = 0.0; 1112 sum3 = 0.0; 1113 sum4 = 0.0; 1114 sum5 = 0.0; 1115 sum6 = 0.0; 1116 sum7 = 0.0; 1117 for (j=0; j<n; j++) { 1118 sum1 += v[jrow]*x[7*idx[jrow]]; 1119 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1120 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1121 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1122 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1123 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1124 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1125 jrow++; 1126 } 1127 y[7*i] += sum1; 1128 y[7*i+1] += sum2; 1129 y[7*i+2] += sum3; 1130 y[7*i+3] += sum4; 1131 y[7*i+4] += sum5; 1132 y[7*i+5] += sum6; 1133 y[7*i+6] += sum7; 1134 } 1135 1136 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1137 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1138 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 #undef __FUNCT__ 1143 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1144 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1145 { 1146 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1147 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1148 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1149 PetscErrorCode ierr; 1150 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1151 1152 PetscFunctionBegin; 1153 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1154 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1155 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1156 for (i=0; i<m; i++) { 1157 idx = a->j + a->i[i] ; 1158 v = a->a + a->i[i] ; 1159 n = a->i[i+1] - a->i[i]; 1160 alpha1 = x[7*i]; 1161 alpha2 = x[7*i+1]; 1162 alpha3 = x[7*i+2]; 1163 alpha4 = x[7*i+3]; 1164 alpha5 = x[7*i+4]; 1165 alpha6 = x[7*i+5]; 1166 alpha7 = x[7*i+6]; 1167 while (n-->0) { 1168 y[7*(*idx)] += alpha1*(*v); 1169 y[7*(*idx)+1] += alpha2*(*v); 1170 y[7*(*idx)+2] += alpha3*(*v); 1171 y[7*(*idx)+3] += alpha4*(*v); 1172 y[7*(*idx)+4] += alpha5*(*v); 1173 y[7*(*idx)+5] += alpha6*(*v); 1174 y[7*(*idx)+6] += alpha7*(*v); 1175 idx++; v++; 1176 } 1177 } 1178 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1179 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1180 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 #undef __FUNCT__ 1185 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1186 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1187 { 1188 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1189 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1190 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1191 PetscErrorCode ierr; 1192 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 1193 PetscInt n,i,jrow,j; 1194 1195 PetscFunctionBegin; 1196 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1197 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1198 idx = a->j; 1199 v = a->a; 1200 ii = a->i; 1201 1202 for (i=0; i<m; i++) { 1203 jrow = ii[i]; 1204 n = ii[i+1] - jrow; 1205 sum1 = 0.0; 1206 sum2 = 0.0; 1207 sum3 = 0.0; 1208 sum4 = 0.0; 1209 sum5 = 0.0; 1210 sum6 = 0.0; 1211 sum7 = 0.0; 1212 sum8 = 0.0; 1213 nonzerorow += (n>0); 1214 for (j=0; j<n; j++) { 1215 sum1 += v[jrow]*x[8*idx[jrow]]; 1216 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1217 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1218 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1219 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1220 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1221 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1222 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1223 jrow++; 1224 } 1225 y[8*i] = sum1; 1226 y[8*i+1] = sum2; 1227 y[8*i+2] = sum3; 1228 y[8*i+3] = sum4; 1229 y[8*i+4] = sum5; 1230 y[8*i+5] = sum6; 1231 y[8*i+6] = sum7; 1232 y[8*i+7] = sum8; 1233 } 1234 1235 ierr = PetscLogFlops(16*a->nz - 8*nonzerorow);CHKERRQ(ierr); 1236 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1237 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 #undef __FUNCT__ 1242 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1243 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1244 { 1245 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1246 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1247 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1248 PetscErrorCode ierr; 1249 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1250 1251 PetscFunctionBegin; 1252 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1253 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1254 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1255 1256 for (i=0; i<m; i++) { 1257 idx = a->j + a->i[i] ; 1258 v = a->a + a->i[i] ; 1259 n = a->i[i+1] - a->i[i]; 1260 alpha1 = x[8*i]; 1261 alpha2 = x[8*i+1]; 1262 alpha3 = x[8*i+2]; 1263 alpha4 = x[8*i+3]; 1264 alpha5 = x[8*i+4]; 1265 alpha6 = x[8*i+5]; 1266 alpha7 = x[8*i+6]; 1267 alpha8 = x[8*i+7]; 1268 while (n-->0) { 1269 y[8*(*idx)] += alpha1*(*v); 1270 y[8*(*idx)+1] += alpha2*(*v); 1271 y[8*(*idx)+2] += alpha3*(*v); 1272 y[8*(*idx)+3] += alpha4*(*v); 1273 y[8*(*idx)+4] += alpha5*(*v); 1274 y[8*(*idx)+5] += alpha6*(*v); 1275 y[8*(*idx)+6] += alpha7*(*v); 1276 y[8*(*idx)+7] += alpha8*(*v); 1277 idx++; v++; 1278 } 1279 } 1280 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1281 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1282 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1283 PetscFunctionReturn(0); 1284 } 1285 1286 #undef __FUNCT__ 1287 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1288 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1289 { 1290 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1291 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1292 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1293 PetscErrorCode ierr; 1294 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1295 PetscInt n,i,jrow,j; 1296 1297 PetscFunctionBegin; 1298 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1299 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1300 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1301 idx = a->j; 1302 v = a->a; 1303 ii = a->i; 1304 1305 for (i=0; i<m; i++) { 1306 jrow = ii[i]; 1307 n = ii[i+1] - jrow; 1308 sum1 = 0.0; 1309 sum2 = 0.0; 1310 sum3 = 0.0; 1311 sum4 = 0.0; 1312 sum5 = 0.0; 1313 sum6 = 0.0; 1314 sum7 = 0.0; 1315 sum8 = 0.0; 1316 for (j=0; j<n; j++) { 1317 sum1 += v[jrow]*x[8*idx[jrow]]; 1318 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1319 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1320 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1321 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1322 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1323 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1324 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1325 jrow++; 1326 } 1327 y[8*i] += sum1; 1328 y[8*i+1] += sum2; 1329 y[8*i+2] += sum3; 1330 y[8*i+3] += sum4; 1331 y[8*i+4] += sum5; 1332 y[8*i+5] += sum6; 1333 y[8*i+6] += sum7; 1334 y[8*i+7] += sum8; 1335 } 1336 1337 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1338 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1339 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 #undef __FUNCT__ 1344 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1345 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1346 { 1347 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1348 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1349 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1350 PetscErrorCode ierr; 1351 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1352 1353 PetscFunctionBegin; 1354 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1355 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1356 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1357 for (i=0; i<m; i++) { 1358 idx = a->j + a->i[i] ; 1359 v = a->a + a->i[i] ; 1360 n = a->i[i+1] - a->i[i]; 1361 alpha1 = x[8*i]; 1362 alpha2 = x[8*i+1]; 1363 alpha3 = x[8*i+2]; 1364 alpha4 = x[8*i+3]; 1365 alpha5 = x[8*i+4]; 1366 alpha6 = x[8*i+5]; 1367 alpha7 = x[8*i+6]; 1368 alpha8 = x[8*i+7]; 1369 while (n-->0) { 1370 y[8*(*idx)] += alpha1*(*v); 1371 y[8*(*idx)+1] += alpha2*(*v); 1372 y[8*(*idx)+2] += alpha3*(*v); 1373 y[8*(*idx)+3] += alpha4*(*v); 1374 y[8*(*idx)+4] += alpha5*(*v); 1375 y[8*(*idx)+5] += alpha6*(*v); 1376 y[8*(*idx)+6] += alpha7*(*v); 1377 y[8*(*idx)+7] += alpha8*(*v); 1378 idx++; v++; 1379 } 1380 } 1381 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1382 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1383 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1384 PetscFunctionReturn(0); 1385 } 1386 1387 /* ------------------------------------------------------------------------------*/ 1388 #undef __FUNCT__ 1389 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1390 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1391 { 1392 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1393 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1394 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1395 PetscErrorCode ierr; 1396 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 1397 PetscInt n,i,jrow,j; 1398 1399 PetscFunctionBegin; 1400 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1401 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1402 idx = a->j; 1403 v = a->a; 1404 ii = a->i; 1405 1406 for (i=0; i<m; i++) { 1407 jrow = ii[i]; 1408 n = ii[i+1] - jrow; 1409 sum1 = 0.0; 1410 sum2 = 0.0; 1411 sum3 = 0.0; 1412 sum4 = 0.0; 1413 sum5 = 0.0; 1414 sum6 = 0.0; 1415 sum7 = 0.0; 1416 sum8 = 0.0; 1417 sum9 = 0.0; 1418 nonzerorow += (n>0); 1419 for (j=0; j<n; j++) { 1420 sum1 += v[jrow]*x[9*idx[jrow]]; 1421 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1422 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1423 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1424 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1425 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1426 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1427 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1428 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1429 jrow++; 1430 } 1431 y[9*i] = sum1; 1432 y[9*i+1] = sum2; 1433 y[9*i+2] = sum3; 1434 y[9*i+3] = sum4; 1435 y[9*i+4] = sum5; 1436 y[9*i+5] = sum6; 1437 y[9*i+6] = sum7; 1438 y[9*i+7] = sum8; 1439 y[9*i+8] = sum9; 1440 } 1441 1442 ierr = PetscLogFlops(18*a->nz - 9*nonzerorow);CHKERRQ(ierr); 1443 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1444 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1445 PetscFunctionReturn(0); 1446 } 1447 1448 /* ------------------------------------------------------------------------------*/ 1449 1450 #undef __FUNCT__ 1451 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1452 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1453 { 1454 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1455 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1456 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1457 PetscErrorCode ierr; 1458 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1459 1460 PetscFunctionBegin; 1461 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1462 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1463 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1464 1465 for (i=0; i<m; i++) { 1466 idx = a->j + a->i[i] ; 1467 v = a->a + a->i[i] ; 1468 n = a->i[i+1] - a->i[i]; 1469 alpha1 = x[9*i]; 1470 alpha2 = x[9*i+1]; 1471 alpha3 = x[9*i+2]; 1472 alpha4 = x[9*i+3]; 1473 alpha5 = x[9*i+4]; 1474 alpha6 = x[9*i+5]; 1475 alpha7 = x[9*i+6]; 1476 alpha8 = x[9*i+7]; 1477 alpha9 = x[9*i+8]; 1478 while (n-->0) { 1479 y[9*(*idx)] += alpha1*(*v); 1480 y[9*(*idx)+1] += alpha2*(*v); 1481 y[9*(*idx)+2] += alpha3*(*v); 1482 y[9*(*idx)+3] += alpha4*(*v); 1483 y[9*(*idx)+4] += alpha5*(*v); 1484 y[9*(*idx)+5] += alpha6*(*v); 1485 y[9*(*idx)+6] += alpha7*(*v); 1486 y[9*(*idx)+7] += alpha8*(*v); 1487 y[9*(*idx)+8] += alpha9*(*v); 1488 idx++; v++; 1489 } 1490 } 1491 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1492 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1493 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1494 PetscFunctionReturn(0); 1495 } 1496 1497 #undef __FUNCT__ 1498 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1499 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1500 { 1501 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1502 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1503 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1504 PetscErrorCode ierr; 1505 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1506 PetscInt n,i,jrow,j; 1507 1508 PetscFunctionBegin; 1509 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1510 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1511 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1512 idx = a->j; 1513 v = a->a; 1514 ii = a->i; 1515 1516 for (i=0; i<m; i++) { 1517 jrow = ii[i]; 1518 n = ii[i+1] - jrow; 1519 sum1 = 0.0; 1520 sum2 = 0.0; 1521 sum3 = 0.0; 1522 sum4 = 0.0; 1523 sum5 = 0.0; 1524 sum6 = 0.0; 1525 sum7 = 0.0; 1526 sum8 = 0.0; 1527 sum9 = 0.0; 1528 for (j=0; j<n; j++) { 1529 sum1 += v[jrow]*x[9*idx[jrow]]; 1530 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1531 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1532 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1533 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1534 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1535 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1536 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1537 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1538 jrow++; 1539 } 1540 y[9*i] += sum1; 1541 y[9*i+1] += sum2; 1542 y[9*i+2] += sum3; 1543 y[9*i+3] += sum4; 1544 y[9*i+4] += sum5; 1545 y[9*i+5] += sum6; 1546 y[9*i+6] += sum7; 1547 y[9*i+7] += sum8; 1548 y[9*i+8] += sum9; 1549 } 1550 1551 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1552 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1553 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1554 PetscFunctionReturn(0); 1555 } 1556 1557 #undef __FUNCT__ 1558 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1559 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1560 { 1561 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1562 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1563 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1564 PetscErrorCode ierr; 1565 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1566 1567 PetscFunctionBegin; 1568 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1569 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1570 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1571 for (i=0; i<m; i++) { 1572 idx = a->j + a->i[i] ; 1573 v = a->a + a->i[i] ; 1574 n = a->i[i+1] - a->i[i]; 1575 alpha1 = x[9*i]; 1576 alpha2 = x[9*i+1]; 1577 alpha3 = x[9*i+2]; 1578 alpha4 = x[9*i+3]; 1579 alpha5 = x[9*i+4]; 1580 alpha6 = x[9*i+5]; 1581 alpha7 = x[9*i+6]; 1582 alpha8 = x[9*i+7]; 1583 alpha9 = x[9*i+8]; 1584 while (n-->0) { 1585 y[9*(*idx)] += alpha1*(*v); 1586 y[9*(*idx)+1] += alpha2*(*v); 1587 y[9*(*idx)+2] += alpha3*(*v); 1588 y[9*(*idx)+3] += alpha4*(*v); 1589 y[9*(*idx)+4] += alpha5*(*v); 1590 y[9*(*idx)+5] += alpha6*(*v); 1591 y[9*(*idx)+6] += alpha7*(*v); 1592 y[9*(*idx)+7] += alpha8*(*v); 1593 y[9*(*idx)+8] += alpha9*(*v); 1594 idx++; v++; 1595 } 1596 } 1597 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1598 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1599 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1600 PetscFunctionReturn(0); 1601 } 1602 /*--------------------------------------------------------------------------------------------*/ 1603 #undef __FUNCT__ 1604 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1605 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1606 { 1607 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1608 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1609 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1610 PetscErrorCode ierr; 1611 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 1612 PetscInt n,i,jrow,j; 1613 1614 PetscFunctionBegin; 1615 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1616 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1617 idx = a->j; 1618 v = a->a; 1619 ii = a->i; 1620 1621 for (i=0; i<m; i++) { 1622 jrow = ii[i]; 1623 n = ii[i+1] - jrow; 1624 sum1 = 0.0; 1625 sum2 = 0.0; 1626 sum3 = 0.0; 1627 sum4 = 0.0; 1628 sum5 = 0.0; 1629 sum6 = 0.0; 1630 sum7 = 0.0; 1631 sum8 = 0.0; 1632 sum9 = 0.0; 1633 sum10 = 0.0; 1634 nonzerorow += (n>0); 1635 for (j=0; j<n; j++) { 1636 sum1 += v[jrow]*x[10*idx[jrow]]; 1637 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1638 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1639 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1640 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1641 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1642 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1643 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1644 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1645 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1646 jrow++; 1647 } 1648 y[10*i] = sum1; 1649 y[10*i+1] = sum2; 1650 y[10*i+2] = sum3; 1651 y[10*i+3] = sum4; 1652 y[10*i+4] = sum5; 1653 y[10*i+5] = sum6; 1654 y[10*i+6] = sum7; 1655 y[10*i+7] = sum8; 1656 y[10*i+8] = sum9; 1657 y[10*i+9] = sum10; 1658 } 1659 1660 ierr = PetscLogFlops(20*a->nz - 10*nonzerorow);CHKERRQ(ierr); 1661 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1662 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1663 PetscFunctionReturn(0); 1664 } 1665 1666 #undef __FUNCT__ 1667 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1668 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1669 { 1670 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1671 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1672 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1673 PetscErrorCode ierr; 1674 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1675 PetscInt n,i,jrow,j; 1676 1677 PetscFunctionBegin; 1678 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1679 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1680 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1681 idx = a->j; 1682 v = a->a; 1683 ii = a->i; 1684 1685 for (i=0; i<m; i++) { 1686 jrow = ii[i]; 1687 n = ii[i+1] - jrow; 1688 sum1 = 0.0; 1689 sum2 = 0.0; 1690 sum3 = 0.0; 1691 sum4 = 0.0; 1692 sum5 = 0.0; 1693 sum6 = 0.0; 1694 sum7 = 0.0; 1695 sum8 = 0.0; 1696 sum9 = 0.0; 1697 sum10 = 0.0; 1698 for (j=0; j<n; j++) { 1699 sum1 += v[jrow]*x[10*idx[jrow]]; 1700 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1701 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1702 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1703 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1704 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1705 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1706 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1707 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1708 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1709 jrow++; 1710 } 1711 y[10*i] += sum1; 1712 y[10*i+1] += sum2; 1713 y[10*i+2] += sum3; 1714 y[10*i+3] += sum4; 1715 y[10*i+4] += sum5; 1716 y[10*i+5] += sum6; 1717 y[10*i+6] += sum7; 1718 y[10*i+7] += sum8; 1719 y[10*i+8] += sum9; 1720 y[10*i+9] += sum10; 1721 } 1722 1723 ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1724 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1725 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1726 PetscFunctionReturn(0); 1727 } 1728 1729 #undef __FUNCT__ 1730 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1731 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1732 { 1733 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1734 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1735 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1736 PetscErrorCode ierr; 1737 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1738 1739 PetscFunctionBegin; 1740 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1741 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1742 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1743 1744 for (i=0; i<m; i++) { 1745 idx = a->j + a->i[i] ; 1746 v = a->a + a->i[i] ; 1747 n = a->i[i+1] - a->i[i]; 1748 alpha1 = x[10*i]; 1749 alpha2 = x[10*i+1]; 1750 alpha3 = x[10*i+2]; 1751 alpha4 = x[10*i+3]; 1752 alpha5 = x[10*i+4]; 1753 alpha6 = x[10*i+5]; 1754 alpha7 = x[10*i+6]; 1755 alpha8 = x[10*i+7]; 1756 alpha9 = x[10*i+8]; 1757 alpha10 = x[10*i+9]; 1758 while (n-->0) { 1759 y[10*(*idx)] += alpha1*(*v); 1760 y[10*(*idx)+1] += alpha2*(*v); 1761 y[10*(*idx)+2] += alpha3*(*v); 1762 y[10*(*idx)+3] += alpha4*(*v); 1763 y[10*(*idx)+4] += alpha5*(*v); 1764 y[10*(*idx)+5] += alpha6*(*v); 1765 y[10*(*idx)+6] += alpha7*(*v); 1766 y[10*(*idx)+7] += alpha8*(*v); 1767 y[10*(*idx)+8] += alpha9*(*v); 1768 y[10*(*idx)+9] += alpha10*(*v); 1769 idx++; v++; 1770 } 1771 } 1772 ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1773 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1774 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1775 PetscFunctionReturn(0); 1776 } 1777 1778 #undef __FUNCT__ 1779 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1780 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1781 { 1782 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1783 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1784 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1785 PetscErrorCode ierr; 1786 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1787 1788 PetscFunctionBegin; 1789 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1790 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1791 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1792 for (i=0; i<m; i++) { 1793 idx = a->j + a->i[i] ; 1794 v = a->a + a->i[i] ; 1795 n = a->i[i+1] - a->i[i]; 1796 alpha1 = x[10*i]; 1797 alpha2 = x[10*i+1]; 1798 alpha3 = x[10*i+2]; 1799 alpha4 = x[10*i+3]; 1800 alpha5 = x[10*i+4]; 1801 alpha6 = x[10*i+5]; 1802 alpha7 = x[10*i+6]; 1803 alpha8 = x[10*i+7]; 1804 alpha9 = x[10*i+8]; 1805 alpha10 = x[10*i+9]; 1806 while (n-->0) { 1807 y[10*(*idx)] += alpha1*(*v); 1808 y[10*(*idx)+1] += alpha2*(*v); 1809 y[10*(*idx)+2] += alpha3*(*v); 1810 y[10*(*idx)+3] += alpha4*(*v); 1811 y[10*(*idx)+4] += alpha5*(*v); 1812 y[10*(*idx)+5] += alpha6*(*v); 1813 y[10*(*idx)+6] += alpha7*(*v); 1814 y[10*(*idx)+7] += alpha8*(*v); 1815 y[10*(*idx)+8] += alpha9*(*v); 1816 y[10*(*idx)+9] += alpha10*(*v); 1817 idx++; v++; 1818 } 1819 } 1820 ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1821 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1822 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1823 PetscFunctionReturn(0); 1824 } 1825 1826 1827 /*--------------------------------------------------------------------------------------------*/ 1828 #undef __FUNCT__ 1829 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1830 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1831 { 1832 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1833 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1834 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1835 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1836 PetscErrorCode ierr; 1837 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 1838 PetscInt n,i,jrow,j; 1839 1840 PetscFunctionBegin; 1841 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1842 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1843 idx = a->j; 1844 v = a->a; 1845 ii = a->i; 1846 1847 for (i=0; i<m; i++) { 1848 jrow = ii[i]; 1849 n = ii[i+1] - jrow; 1850 sum1 = 0.0; 1851 sum2 = 0.0; 1852 sum3 = 0.0; 1853 sum4 = 0.0; 1854 sum5 = 0.0; 1855 sum6 = 0.0; 1856 sum7 = 0.0; 1857 sum8 = 0.0; 1858 sum9 = 0.0; 1859 sum10 = 0.0; 1860 sum11 = 0.0; 1861 sum12 = 0.0; 1862 sum13 = 0.0; 1863 sum14 = 0.0; 1864 sum15 = 0.0; 1865 sum16 = 0.0; 1866 nonzerorow += (n>0); 1867 for (j=0; j<n; j++) { 1868 sum1 += v[jrow]*x[16*idx[jrow]]; 1869 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1870 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1871 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1872 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1873 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1874 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1875 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1876 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1877 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1878 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1879 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1880 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1881 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1882 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1883 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1884 jrow++; 1885 } 1886 y[16*i] = sum1; 1887 y[16*i+1] = sum2; 1888 y[16*i+2] = sum3; 1889 y[16*i+3] = sum4; 1890 y[16*i+4] = sum5; 1891 y[16*i+5] = sum6; 1892 y[16*i+6] = sum7; 1893 y[16*i+7] = sum8; 1894 y[16*i+8] = sum9; 1895 y[16*i+9] = sum10; 1896 y[16*i+10] = sum11; 1897 y[16*i+11] = sum12; 1898 y[16*i+12] = sum13; 1899 y[16*i+13] = sum14; 1900 y[16*i+14] = sum15; 1901 y[16*i+15] = sum16; 1902 } 1903 1904 ierr = PetscLogFlops(32*a->nz - 16*nonzerorow);CHKERRQ(ierr); 1905 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1906 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1907 PetscFunctionReturn(0); 1908 } 1909 1910 #undef __FUNCT__ 1911 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1912 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1913 { 1914 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1915 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1916 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1917 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1918 PetscErrorCode ierr; 1919 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1920 1921 PetscFunctionBegin; 1922 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1923 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1924 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1925 1926 for (i=0; i<m; i++) { 1927 idx = a->j + a->i[i] ; 1928 v = a->a + a->i[i] ; 1929 n = a->i[i+1] - a->i[i]; 1930 alpha1 = x[16*i]; 1931 alpha2 = x[16*i+1]; 1932 alpha3 = x[16*i+2]; 1933 alpha4 = x[16*i+3]; 1934 alpha5 = x[16*i+4]; 1935 alpha6 = x[16*i+5]; 1936 alpha7 = x[16*i+6]; 1937 alpha8 = x[16*i+7]; 1938 alpha9 = x[16*i+8]; 1939 alpha10 = x[16*i+9]; 1940 alpha11 = x[16*i+10]; 1941 alpha12 = x[16*i+11]; 1942 alpha13 = x[16*i+12]; 1943 alpha14 = x[16*i+13]; 1944 alpha15 = x[16*i+14]; 1945 alpha16 = x[16*i+15]; 1946 while (n-->0) { 1947 y[16*(*idx)] += alpha1*(*v); 1948 y[16*(*idx)+1] += alpha2*(*v); 1949 y[16*(*idx)+2] += alpha3*(*v); 1950 y[16*(*idx)+3] += alpha4*(*v); 1951 y[16*(*idx)+4] += alpha5*(*v); 1952 y[16*(*idx)+5] += alpha6*(*v); 1953 y[16*(*idx)+6] += alpha7*(*v); 1954 y[16*(*idx)+7] += alpha8*(*v); 1955 y[16*(*idx)+8] += alpha9*(*v); 1956 y[16*(*idx)+9] += alpha10*(*v); 1957 y[16*(*idx)+10] += alpha11*(*v); 1958 y[16*(*idx)+11] += alpha12*(*v); 1959 y[16*(*idx)+12] += alpha13*(*v); 1960 y[16*(*idx)+13] += alpha14*(*v); 1961 y[16*(*idx)+14] += alpha15*(*v); 1962 y[16*(*idx)+15] += alpha16*(*v); 1963 idx++; v++; 1964 } 1965 } 1966 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 1967 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1968 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1969 PetscFunctionReturn(0); 1970 } 1971 1972 #undef __FUNCT__ 1973 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1974 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1975 { 1976 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1977 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1978 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1979 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1980 PetscErrorCode ierr; 1981 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1982 PetscInt n,i,jrow,j; 1983 1984 PetscFunctionBegin; 1985 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1986 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1987 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1988 idx = a->j; 1989 v = a->a; 1990 ii = a->i; 1991 1992 for (i=0; i<m; i++) { 1993 jrow = ii[i]; 1994 n = ii[i+1] - jrow; 1995 sum1 = 0.0; 1996 sum2 = 0.0; 1997 sum3 = 0.0; 1998 sum4 = 0.0; 1999 sum5 = 0.0; 2000 sum6 = 0.0; 2001 sum7 = 0.0; 2002 sum8 = 0.0; 2003 sum9 = 0.0; 2004 sum10 = 0.0; 2005 sum11 = 0.0; 2006 sum12 = 0.0; 2007 sum13 = 0.0; 2008 sum14 = 0.0; 2009 sum15 = 0.0; 2010 sum16 = 0.0; 2011 for (j=0; j<n; j++) { 2012 sum1 += v[jrow]*x[16*idx[jrow]]; 2013 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2014 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2015 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2016 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2017 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2018 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2019 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2020 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2021 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2022 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2023 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2024 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2025 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2026 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2027 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2028 jrow++; 2029 } 2030 y[16*i] += sum1; 2031 y[16*i+1] += sum2; 2032 y[16*i+2] += sum3; 2033 y[16*i+3] += sum4; 2034 y[16*i+4] += sum5; 2035 y[16*i+5] += sum6; 2036 y[16*i+6] += sum7; 2037 y[16*i+7] += sum8; 2038 y[16*i+8] += sum9; 2039 y[16*i+9] += sum10; 2040 y[16*i+10] += sum11; 2041 y[16*i+11] += sum12; 2042 y[16*i+12] += sum13; 2043 y[16*i+13] += sum14; 2044 y[16*i+14] += sum15; 2045 y[16*i+15] += sum16; 2046 } 2047 2048 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2049 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2050 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 #undef __FUNCT__ 2055 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2056 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2057 { 2058 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2059 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2060 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2061 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2062 PetscErrorCode ierr; 2063 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 2064 2065 PetscFunctionBegin; 2066 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2067 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2068 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2069 for (i=0; i<m; i++) { 2070 idx = a->j + a->i[i] ; 2071 v = a->a + a->i[i] ; 2072 n = a->i[i+1] - a->i[i]; 2073 alpha1 = x[16*i]; 2074 alpha2 = x[16*i+1]; 2075 alpha3 = x[16*i+2]; 2076 alpha4 = x[16*i+3]; 2077 alpha5 = x[16*i+4]; 2078 alpha6 = x[16*i+5]; 2079 alpha7 = x[16*i+6]; 2080 alpha8 = x[16*i+7]; 2081 alpha9 = x[16*i+8]; 2082 alpha10 = x[16*i+9]; 2083 alpha11 = x[16*i+10]; 2084 alpha12 = x[16*i+11]; 2085 alpha13 = x[16*i+12]; 2086 alpha14 = x[16*i+13]; 2087 alpha15 = x[16*i+14]; 2088 alpha16 = x[16*i+15]; 2089 while (n-->0) { 2090 y[16*(*idx)] += alpha1*(*v); 2091 y[16*(*idx)+1] += alpha2*(*v); 2092 y[16*(*idx)+2] += alpha3*(*v); 2093 y[16*(*idx)+3] += alpha4*(*v); 2094 y[16*(*idx)+4] += alpha5*(*v); 2095 y[16*(*idx)+5] += alpha6*(*v); 2096 y[16*(*idx)+6] += alpha7*(*v); 2097 y[16*(*idx)+7] += alpha8*(*v); 2098 y[16*(*idx)+8] += alpha9*(*v); 2099 y[16*(*idx)+9] += alpha10*(*v); 2100 y[16*(*idx)+10] += alpha11*(*v); 2101 y[16*(*idx)+11] += alpha12*(*v); 2102 y[16*(*idx)+12] += alpha13*(*v); 2103 y[16*(*idx)+13] += alpha14*(*v); 2104 y[16*(*idx)+14] += alpha15*(*v); 2105 y[16*(*idx)+15] += alpha16*(*v); 2106 idx++; v++; 2107 } 2108 } 2109 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2110 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2111 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2112 PetscFunctionReturn(0); 2113 } 2114 2115 /*--------------------------------------------------------------------------------------------*/ 2116 #undef __FUNCT__ 2117 #define __FUNCT__ "MatMult_SeqMAIJ_18" 2118 PetscErrorCode MatMult_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2119 { 2120 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2121 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2122 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2123 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2124 PetscErrorCode ierr; 2125 PetscInt m = b->AIJ->rmap.n,nonzerorow=0,*idx,*ii; 2126 PetscInt n,i,jrow,j; 2127 2128 PetscFunctionBegin; 2129 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2130 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2131 idx = a->j; 2132 v = a->a; 2133 ii = a->i; 2134 2135 for (i=0; i<m; i++) { 2136 jrow = ii[i]; 2137 n = ii[i+1] - jrow; 2138 sum1 = 0.0; 2139 sum2 = 0.0; 2140 sum3 = 0.0; 2141 sum4 = 0.0; 2142 sum5 = 0.0; 2143 sum6 = 0.0; 2144 sum7 = 0.0; 2145 sum8 = 0.0; 2146 sum9 = 0.0; 2147 sum10 = 0.0; 2148 sum11 = 0.0; 2149 sum12 = 0.0; 2150 sum13 = 0.0; 2151 sum14 = 0.0; 2152 sum15 = 0.0; 2153 sum16 = 0.0; 2154 sum17 = 0.0; 2155 sum18 = 0.0; 2156 nonzerorow += (n>0); 2157 for (j=0; j<n; j++) { 2158 sum1 += v[jrow]*x[18*idx[jrow]]; 2159 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2160 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2161 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2162 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2163 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2164 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2165 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2166 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2167 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2168 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2169 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2170 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2171 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2172 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2173 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2174 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2175 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2176 jrow++; 2177 } 2178 y[18*i] = sum1; 2179 y[18*i+1] = sum2; 2180 y[18*i+2] = sum3; 2181 y[18*i+3] = sum4; 2182 y[18*i+4] = sum5; 2183 y[18*i+5] = sum6; 2184 y[18*i+6] = sum7; 2185 y[18*i+7] = sum8; 2186 y[18*i+8] = sum9; 2187 y[18*i+9] = sum10; 2188 y[18*i+10] = sum11; 2189 y[18*i+11] = sum12; 2190 y[18*i+12] = sum13; 2191 y[18*i+13] = sum14; 2192 y[18*i+14] = sum15; 2193 y[18*i+15] = sum16; 2194 y[18*i+16] = sum17; 2195 y[18*i+17] = sum18; 2196 } 2197 2198 ierr = PetscLogFlops(36*a->nz - 18*nonzerorow);CHKERRQ(ierr); 2199 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2200 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2201 PetscFunctionReturn(0); 2202 } 2203 2204 #undef __FUNCT__ 2205 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_18" 2206 PetscErrorCode MatMultTranspose_SeqMAIJ_18(Mat A,Vec xx,Vec yy) 2207 { 2208 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2209 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2210 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 2211 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2212 PetscErrorCode ierr; 2213 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 2214 2215 PetscFunctionBegin; 2216 ierr = VecSet(yy,zero);CHKERRQ(ierr); 2217 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2218 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 2219 2220 for (i=0; i<m; i++) { 2221 idx = a->j + a->i[i] ; 2222 v = a->a + a->i[i] ; 2223 n = a->i[i+1] - a->i[i]; 2224 alpha1 = x[18*i]; 2225 alpha2 = x[18*i+1]; 2226 alpha3 = x[18*i+2]; 2227 alpha4 = x[18*i+3]; 2228 alpha5 = x[18*i+4]; 2229 alpha6 = x[18*i+5]; 2230 alpha7 = x[18*i+6]; 2231 alpha8 = x[18*i+7]; 2232 alpha9 = x[18*i+8]; 2233 alpha10 = x[18*i+9]; 2234 alpha11 = x[18*i+10]; 2235 alpha12 = x[18*i+11]; 2236 alpha13 = x[18*i+12]; 2237 alpha14 = x[18*i+13]; 2238 alpha15 = x[18*i+14]; 2239 alpha16 = x[18*i+15]; 2240 alpha17 = x[18*i+16]; 2241 alpha18 = x[18*i+17]; 2242 while (n-->0) { 2243 y[18*(*idx)] += alpha1*(*v); 2244 y[18*(*idx)+1] += alpha2*(*v); 2245 y[18*(*idx)+2] += alpha3*(*v); 2246 y[18*(*idx)+3] += alpha4*(*v); 2247 y[18*(*idx)+4] += alpha5*(*v); 2248 y[18*(*idx)+5] += alpha6*(*v); 2249 y[18*(*idx)+6] += alpha7*(*v); 2250 y[18*(*idx)+7] += alpha8*(*v); 2251 y[18*(*idx)+8] += alpha9*(*v); 2252 y[18*(*idx)+9] += alpha10*(*v); 2253 y[18*(*idx)+10] += alpha11*(*v); 2254 y[18*(*idx)+11] += alpha12*(*v); 2255 y[18*(*idx)+12] += alpha13*(*v); 2256 y[18*(*idx)+13] += alpha14*(*v); 2257 y[18*(*idx)+14] += alpha15*(*v); 2258 y[18*(*idx)+15] += alpha16*(*v); 2259 y[18*(*idx)+16] += alpha17*(*v); 2260 y[18*(*idx)+17] += alpha18*(*v); 2261 idx++; v++; 2262 } 2263 } 2264 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 2265 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2266 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 2267 PetscFunctionReturn(0); 2268 } 2269 2270 #undef __FUNCT__ 2271 #define __FUNCT__ "MatMultAdd_SeqMAIJ_18" 2272 PetscErrorCode MatMultAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2273 { 2274 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2275 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2276 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 2277 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16, sum17, sum18; 2278 PetscErrorCode ierr; 2279 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 2280 PetscInt n,i,jrow,j; 2281 2282 PetscFunctionBegin; 2283 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2284 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2285 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2286 idx = a->j; 2287 v = a->a; 2288 ii = a->i; 2289 2290 for (i=0; i<m; i++) { 2291 jrow = ii[i]; 2292 n = ii[i+1] - jrow; 2293 sum1 = 0.0; 2294 sum2 = 0.0; 2295 sum3 = 0.0; 2296 sum4 = 0.0; 2297 sum5 = 0.0; 2298 sum6 = 0.0; 2299 sum7 = 0.0; 2300 sum8 = 0.0; 2301 sum9 = 0.0; 2302 sum10 = 0.0; 2303 sum11 = 0.0; 2304 sum12 = 0.0; 2305 sum13 = 0.0; 2306 sum14 = 0.0; 2307 sum15 = 0.0; 2308 sum16 = 0.0; 2309 sum17 = 0.0; 2310 sum18 = 0.0; 2311 for (j=0; j<n; j++) { 2312 sum1 += v[jrow]*x[18*idx[jrow]]; 2313 sum2 += v[jrow]*x[18*idx[jrow]+1]; 2314 sum3 += v[jrow]*x[18*idx[jrow]+2]; 2315 sum4 += v[jrow]*x[18*idx[jrow]+3]; 2316 sum5 += v[jrow]*x[18*idx[jrow]+4]; 2317 sum6 += v[jrow]*x[18*idx[jrow]+5]; 2318 sum7 += v[jrow]*x[18*idx[jrow]+6]; 2319 sum8 += v[jrow]*x[18*idx[jrow]+7]; 2320 sum9 += v[jrow]*x[18*idx[jrow]+8]; 2321 sum10 += v[jrow]*x[18*idx[jrow]+9]; 2322 sum11 += v[jrow]*x[18*idx[jrow]+10]; 2323 sum12 += v[jrow]*x[18*idx[jrow]+11]; 2324 sum13 += v[jrow]*x[18*idx[jrow]+12]; 2325 sum14 += v[jrow]*x[18*idx[jrow]+13]; 2326 sum15 += v[jrow]*x[18*idx[jrow]+14]; 2327 sum16 += v[jrow]*x[18*idx[jrow]+15]; 2328 sum17 += v[jrow]*x[18*idx[jrow]+16]; 2329 sum18 += v[jrow]*x[18*idx[jrow]+17]; 2330 jrow++; 2331 } 2332 y[18*i] += sum1; 2333 y[18*i+1] += sum2; 2334 y[18*i+2] += sum3; 2335 y[18*i+3] += sum4; 2336 y[18*i+4] += sum5; 2337 y[18*i+5] += sum6; 2338 y[18*i+6] += sum7; 2339 y[18*i+7] += sum8; 2340 y[18*i+8] += sum9; 2341 y[18*i+9] += sum10; 2342 y[18*i+10] += sum11; 2343 y[18*i+11] += sum12; 2344 y[18*i+12] += sum13; 2345 y[18*i+13] += sum14; 2346 y[18*i+14] += sum15; 2347 y[18*i+15] += sum16; 2348 y[18*i+16] += sum17; 2349 y[18*i+17] += sum18; 2350 } 2351 2352 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 2353 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2354 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2355 PetscFunctionReturn(0); 2356 } 2357 2358 #undef __FUNCT__ 2359 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_18" 2360 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_18(Mat A,Vec xx,Vec yy,Vec zz) 2361 { 2362 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2363 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2364 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2365 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16,alpha17,alpha18; 2366 PetscErrorCode ierr; 2367 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 2368 2369 PetscFunctionBegin; 2370 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2371 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2372 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2373 for (i=0; i<m; i++) { 2374 idx = a->j + a->i[i] ; 2375 v = a->a + a->i[i] ; 2376 n = a->i[i+1] - a->i[i]; 2377 alpha1 = x[18*i]; 2378 alpha2 = x[18*i+1]; 2379 alpha3 = x[18*i+2]; 2380 alpha4 = x[18*i+3]; 2381 alpha5 = x[18*i+4]; 2382 alpha6 = x[18*i+5]; 2383 alpha7 = x[18*i+6]; 2384 alpha8 = x[18*i+7]; 2385 alpha9 = x[18*i+8]; 2386 alpha10 = x[18*i+9]; 2387 alpha11 = x[18*i+10]; 2388 alpha12 = x[18*i+11]; 2389 alpha13 = x[18*i+12]; 2390 alpha14 = x[18*i+13]; 2391 alpha15 = x[18*i+14]; 2392 alpha16 = x[18*i+15]; 2393 alpha17 = x[18*i+16]; 2394 alpha18 = x[18*i+17]; 2395 while (n-->0) { 2396 y[18*(*idx)] += alpha1*(*v); 2397 y[18*(*idx)+1] += alpha2*(*v); 2398 y[18*(*idx)+2] += alpha3*(*v); 2399 y[18*(*idx)+3] += alpha4*(*v); 2400 y[18*(*idx)+4] += alpha5*(*v); 2401 y[18*(*idx)+5] += alpha6*(*v); 2402 y[18*(*idx)+6] += alpha7*(*v); 2403 y[18*(*idx)+7] += alpha8*(*v); 2404 y[18*(*idx)+8] += alpha9*(*v); 2405 y[18*(*idx)+9] += alpha10*(*v); 2406 y[18*(*idx)+10] += alpha11*(*v); 2407 y[18*(*idx)+11] += alpha12*(*v); 2408 y[18*(*idx)+12] += alpha13*(*v); 2409 y[18*(*idx)+13] += alpha14*(*v); 2410 y[18*(*idx)+14] += alpha15*(*v); 2411 y[18*(*idx)+15] += alpha16*(*v); 2412 y[18*(*idx)+16] += alpha17*(*v); 2413 y[18*(*idx)+17] += alpha18*(*v); 2414 idx++; v++; 2415 } 2416 } 2417 ierr = PetscLogFlops(36*a->nz);CHKERRQ(ierr); 2418 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2419 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 /*===================================================================================*/ 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2426 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2427 { 2428 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2429 PetscErrorCode ierr; 2430 2431 PetscFunctionBegin; 2432 /* start the scatter */ 2433 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2434 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2435 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2436 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2437 PetscFunctionReturn(0); 2438 } 2439 2440 #undef __FUNCT__ 2441 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2442 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2443 { 2444 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2445 PetscErrorCode ierr; 2446 2447 PetscFunctionBegin; 2448 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2449 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2450 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2451 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2452 PetscFunctionReturn(0); 2453 } 2454 2455 #undef __FUNCT__ 2456 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2457 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2458 { 2459 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2460 PetscErrorCode ierr; 2461 2462 PetscFunctionBegin; 2463 /* start the scatter */ 2464 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2465 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2466 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2467 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2468 PetscFunctionReturn(0); 2469 } 2470 2471 #undef __FUNCT__ 2472 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2473 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2474 { 2475 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2480 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2481 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2482 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2483 PetscFunctionReturn(0); 2484 } 2485 2486 /* ----------------------------------------------------------------*/ 2487 #undef __FUNCT__ 2488 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2489 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2490 { 2491 /* This routine requires testing -- but it's getting better. */ 2492 PetscErrorCode ierr; 2493 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2494 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2495 Mat P=pp->AIJ; 2496 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2497 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 2498 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2499 PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn; 2500 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 2501 MatScalar *ca; 2502 2503 PetscFunctionBegin; 2504 /* Start timer */ 2505 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2506 2507 /* Get ij structure of P^T */ 2508 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2509 2510 cn = pn*ppdof; 2511 /* Allocate ci array, arrays for fill computation and */ 2512 /* free space for accumulating nonzero column info */ 2513 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2514 ci[0] = 0; 2515 2516 /* Work arrays for rows of P^T*A */ 2517 ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 2518 ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 2519 ptasparserow = ptadenserow + an; 2520 denserow = ptasparserow + an; 2521 sparserow = denserow + cn; 2522 2523 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2524 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2525 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2526 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 2527 current_space = free_space; 2528 2529 /* Determine symbolic info for each row of C: */ 2530 for (i=0;i<pn;i++) { 2531 ptnzi = pti[i+1] - pti[i]; 2532 ptJ = ptj + pti[i]; 2533 for (dof=0;dof<ppdof;dof++) { 2534 ptanzi = 0; 2535 /* Determine symbolic row of PtA: */ 2536 for (j=0;j<ptnzi;j++) { 2537 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2538 arow = ptJ[j]*ppdof + dof; 2539 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2540 anzj = ai[arow+1] - ai[arow]; 2541 ajj = aj + ai[arow]; 2542 for (k=0;k<anzj;k++) { 2543 if (!ptadenserow[ajj[k]]) { 2544 ptadenserow[ajj[k]] = -1; 2545 ptasparserow[ptanzi++] = ajj[k]; 2546 } 2547 } 2548 } 2549 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2550 ptaj = ptasparserow; 2551 cnzi = 0; 2552 for (j=0;j<ptanzi;j++) { 2553 /* Get offset within block of P */ 2554 pshift = *ptaj%ppdof; 2555 /* Get block row of P */ 2556 prow = (*ptaj++)/ppdof; /* integer division */ 2557 /* P has same number of nonzeros per row as the compressed form */ 2558 pnzj = pi[prow+1] - pi[prow]; 2559 pjj = pj + pi[prow]; 2560 for (k=0;k<pnzj;k++) { 2561 /* Locations in C are shifted by the offset within the block */ 2562 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2563 if (!denserow[pjj[k]*ppdof+pshift]) { 2564 denserow[pjj[k]*ppdof+pshift] = -1; 2565 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2566 } 2567 } 2568 } 2569 2570 /* sort sparserow */ 2571 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2572 2573 /* If free space is not available, make more free space */ 2574 /* Double the amount of total space in the list */ 2575 if (current_space->local_remaining<cnzi) { 2576 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2577 } 2578 2579 /* Copy data into free space, and zero out denserows */ 2580 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2581 current_space->array += cnzi; 2582 current_space->local_used += cnzi; 2583 current_space->local_remaining -= cnzi; 2584 2585 for (j=0;j<ptanzi;j++) { 2586 ptadenserow[ptasparserow[j]] = 0; 2587 } 2588 for (j=0;j<cnzi;j++) { 2589 denserow[sparserow[j]] = 0; 2590 } 2591 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2592 /* For now, we will recompute what is needed. */ 2593 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2594 } 2595 } 2596 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2597 /* Allocate space for cj, initialize cj, and */ 2598 /* destroy list of free space and other temporary array(s) */ 2599 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2600 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2601 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 2602 2603 /* Allocate space for ca */ 2604 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2605 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2606 2607 /* put together the new matrix */ 2608 ierr = MatCreateSeqAIJWithArrays(((PetscObject)A)->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2609 2610 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2611 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2612 c = (Mat_SeqAIJ *)((*C)->data); 2613 c->free_a = PETSC_TRUE; 2614 c->free_ij = PETSC_TRUE; 2615 c->nonew = 0; 2616 2617 /* Clean up. */ 2618 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2619 2620 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2621 PetscFunctionReturn(0); 2622 } 2623 2624 #undef __FUNCT__ 2625 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 2626 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2627 { 2628 /* This routine requires testing -- first draft only */ 2629 PetscErrorCode ierr; 2630 PetscInt flops=0; 2631 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2632 Mat P=pp->AIJ; 2633 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 2634 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 2635 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2636 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 2637 PetscInt *ci=c->i,*cj=c->j,*cjj; 2638 PetscInt am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof; 2639 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2640 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 2641 2642 PetscFunctionBegin; 2643 /* Allocate temporary array for storage of one row of A*P */ 2644 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2645 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2646 2647 apj = (PetscInt *)(apa + cn); 2648 apjdense = apj + cn; 2649 2650 /* Clear old values in C */ 2651 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2652 2653 for (i=0;i<am;i++) { 2654 /* Form sparse row of A*P */ 2655 anzi = ai[i+1] - ai[i]; 2656 apnzj = 0; 2657 for (j=0;j<anzi;j++) { 2658 /* Get offset within block of P */ 2659 pshift = *aj%ppdof; 2660 /* Get block row of P */ 2661 prow = *aj++/ppdof; /* integer division */ 2662 pnzj = pi[prow+1] - pi[prow]; 2663 pjj = pj + pi[prow]; 2664 paj = pa + pi[prow]; 2665 for (k=0;k<pnzj;k++) { 2666 poffset = pjj[k]*ppdof+pshift; 2667 if (!apjdense[poffset]) { 2668 apjdense[poffset] = -1; 2669 apj[apnzj++] = poffset; 2670 } 2671 apa[poffset] += (*aa)*paj[k]; 2672 } 2673 flops += 2*pnzj; 2674 aa++; 2675 } 2676 2677 /* Sort the j index array for quick sparse axpy. */ 2678 /* Note: a array does not need sorting as it is in dense storage locations. */ 2679 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2680 2681 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 2682 prow = i/ppdof; /* integer division */ 2683 pshift = i%ppdof; 2684 poffset = pi[prow]; 2685 pnzi = pi[prow+1] - poffset; 2686 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 2687 pJ = pj+poffset; 2688 pA = pa+poffset; 2689 for (j=0;j<pnzi;j++) { 2690 crow = (*pJ)*ppdof+pshift; 2691 cjj = cj + ci[crow]; 2692 caj = ca + ci[crow]; 2693 pJ++; 2694 /* Perform sparse axpy operation. Note cjj includes apj. */ 2695 for (k=0,nextap=0;nextap<apnzj;k++) { 2696 if (cjj[k]==apj[nextap]) { 2697 caj[k] += (*pA)*apa[apj[nextap++]]; 2698 } 2699 } 2700 flops += 2*apnzj; 2701 pA++; 2702 } 2703 2704 /* Zero the current row info for A*P */ 2705 for (j=0;j<apnzj;j++) { 2706 apa[apj[j]] = 0.; 2707 apjdense[apj[j]] = 0; 2708 } 2709 } 2710 2711 /* Assemble the final matrix and clean up */ 2712 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2713 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2714 ierr = PetscFree(apa);CHKERRQ(ierr); 2715 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2716 PetscFunctionReturn(0); 2717 } 2718 2719 #undef __FUNCT__ 2720 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 2721 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2722 { 2723 PetscErrorCode ierr; 2724 2725 PetscFunctionBegin; 2726 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 2727 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 2728 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 2729 PetscFunctionReturn(0); 2730 } 2731 2732 #undef __FUNCT__ 2733 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 2734 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 2735 { 2736 PetscFunctionBegin; 2737 SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 2738 PetscFunctionReturn(0); 2739 } 2740 2741 EXTERN_C_BEGIN 2742 #undef __FUNCT__ 2743 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2744 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2745 { 2746 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2747 Mat a = b->AIJ,B; 2748 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 2749 PetscErrorCode ierr; 2750 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2751 PetscInt *cols; 2752 PetscScalar *vals; 2753 2754 PetscFunctionBegin; 2755 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 2756 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 2757 for (i=0; i<m; i++) { 2758 nmax = PetscMax(nmax,aij->ilen[i]); 2759 for (j=0; j<dof; j++) { 2760 ilen[dof*i+j] = aij->ilen[i]; 2761 } 2762 } 2763 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 2764 ierr = PetscFree(ilen);CHKERRQ(ierr); 2765 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 2766 ii = 0; 2767 for (i=0; i<m; i++) { 2768 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2769 for (j=0; j<dof; j++) { 2770 for (k=0; k<ncols; k++) { 2771 icols[k] = dof*cols[k]+j; 2772 } 2773 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2774 ii++; 2775 } 2776 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2777 } 2778 ierr = PetscFree(icols);CHKERRQ(ierr); 2779 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2780 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2781 2782 if (reuse == MAT_REUSE_MATRIX) { 2783 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2784 } else { 2785 *newmat = B; 2786 } 2787 PetscFunctionReturn(0); 2788 } 2789 EXTERN_C_END 2790 2791 #include "src/mat/impls/aij/mpi/mpiaij.h" 2792 2793 EXTERN_C_BEGIN 2794 #undef __FUNCT__ 2795 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2796 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2797 { 2798 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2799 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 2800 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 2801 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 2802 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 2803 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 2804 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 2805 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 2806 PetscInt rstart,cstart,*garray,ii,k; 2807 PetscErrorCode ierr; 2808 PetscScalar *vals,*ovals; 2809 2810 PetscFunctionBegin; 2811 ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr); 2812 for (i=0; i<A->rmap.n/dof; i++) { 2813 nmax = PetscMax(nmax,AIJ->ilen[i]); 2814 onmax = PetscMax(onmax,OAIJ->ilen[i]); 2815 for (j=0; j<dof; j++) { 2816 dnz[dof*i+j] = AIJ->ilen[i]; 2817 onz[dof*i+j] = OAIJ->ilen[i]; 2818 } 2819 } 2820 ierr = MatCreateMPIAIJ(((PetscObject)A)->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr); 2821 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 2822 2823 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 2824 rstart = dof*maij->A->rmap.rstart; 2825 cstart = dof*maij->A->cmap.rstart; 2826 garray = mpiaij->garray; 2827 2828 ii = rstart; 2829 for (i=0; i<A->rmap.n/dof; i++) { 2830 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2831 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2832 for (j=0; j<dof; j++) { 2833 for (k=0; k<ncols; k++) { 2834 icols[k] = cstart + dof*cols[k]+j; 2835 } 2836 for (k=0; k<oncols; k++) { 2837 oicols[k] = dof*garray[ocols[k]]+j; 2838 } 2839 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2840 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 2841 ii++; 2842 } 2843 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2844 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2845 } 2846 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 2847 2848 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2849 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2850 2851 if (reuse == MAT_REUSE_MATRIX) { 2852 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 2853 ((PetscObject)A)->refct = 1; 2854 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2855 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 2856 } else { 2857 *newmat = B; 2858 } 2859 PetscFunctionReturn(0); 2860 } 2861 EXTERN_C_END 2862 2863 2864 /* ---------------------------------------------------------------------------------- */ 2865 /*MC 2866 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 2867 operations for multicomponent problems. It interpolates each component the same 2868 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 2869 and MATMPIAIJ for distributed matrices. 2870 2871 Operations provided: 2872 + MatMult 2873 . MatMultTranspose 2874 . MatMultAdd 2875 . MatMultTransposeAdd 2876 - MatView 2877 2878 Level: advanced 2879 2880 M*/ 2881 #undef __FUNCT__ 2882 #define __FUNCT__ "MatCreateMAIJ" 2883 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 2884 { 2885 PetscErrorCode ierr; 2886 PetscMPIInt size; 2887 PetscInt n; 2888 Mat_MPIMAIJ *b; 2889 Mat B; 2890 2891 PetscFunctionBegin; 2892 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2893 2894 if (dof == 1) { 2895 *maij = A; 2896 } else { 2897 ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 2898 ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr); 2899 B->assembled = PETSC_TRUE; 2900 2901 ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr); 2902 if (size == 1) { 2903 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 2904 B->ops->destroy = MatDestroy_SeqMAIJ; 2905 B->ops->view = MatView_SeqMAIJ; 2906 b = (Mat_MPIMAIJ*)B->data; 2907 b->dof = dof; 2908 b->AIJ = A; 2909 if (dof == 2) { 2910 B->ops->mult = MatMult_SeqMAIJ_2; 2911 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2912 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2913 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2914 } else if (dof == 3) { 2915 B->ops->mult = MatMult_SeqMAIJ_3; 2916 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2917 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2918 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2919 } else if (dof == 4) { 2920 B->ops->mult = MatMult_SeqMAIJ_4; 2921 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2922 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2923 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2924 } else if (dof == 5) { 2925 B->ops->mult = MatMult_SeqMAIJ_5; 2926 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2927 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2928 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 2929 } else if (dof == 6) { 2930 B->ops->mult = MatMult_SeqMAIJ_6; 2931 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 2932 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 2933 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2934 } else if (dof == 7) { 2935 B->ops->mult = MatMult_SeqMAIJ_7; 2936 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2937 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2938 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 2939 } else if (dof == 8) { 2940 B->ops->mult = MatMult_SeqMAIJ_8; 2941 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 2942 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 2943 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 2944 } else if (dof == 9) { 2945 B->ops->mult = MatMult_SeqMAIJ_9; 2946 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 2947 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 2948 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2949 } else if (dof == 10) { 2950 B->ops->mult = MatMult_SeqMAIJ_10; 2951 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 2952 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 2953 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 2954 } else if (dof == 16) { 2955 B->ops->mult = MatMult_SeqMAIJ_16; 2956 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 2957 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 2958 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 2959 } else if (dof == 18) { 2960 B->ops->mult = MatMult_SeqMAIJ_18; 2961 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 2962 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 2963 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 2964 } else { 2965 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 2966 } 2967 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 2968 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2969 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 2970 } else { 2971 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 2972 IS from,to; 2973 Vec gvec; 2974 PetscInt *garray,i; 2975 2976 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 2977 B->ops->destroy = MatDestroy_MPIMAIJ; 2978 B->ops->view = MatView_MPIMAIJ; 2979 b = (Mat_MPIMAIJ*)B->data; 2980 b->dof = dof; 2981 b->A = A; 2982 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 2983 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 2984 2985 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 2986 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 2987 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 2988 2989 /* create two temporary Index sets for build scatter gather */ 2990 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2991 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 2992 ierr = ISCreateBlock(((PetscObject)A)->comm,dof,n,garray,&from);CHKERRQ(ierr); 2993 ierr = PetscFree(garray);CHKERRQ(ierr); 2994 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 2995 2996 /* create temporary global vector to generate scatter context */ 2997 ierr = VecCreateMPIWithArray(((PetscObject)A)->comm,dof*A->cmap.n,dof*A->cmap.N,PETSC_NULL,&gvec);CHKERRQ(ierr); 2998 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 2999 3000 /* generate the scatter context */ 3001 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 3002 3003 ierr = ISDestroy(from);CHKERRQ(ierr); 3004 ierr = ISDestroy(to);CHKERRQ(ierr); 3005 ierr = VecDestroy(gvec);CHKERRQ(ierr); 3006 3007 B->ops->mult = MatMult_MPIMAIJ_dof; 3008 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 3009 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 3010 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 3011 B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 3012 B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 3013 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 3014 } 3015 *maij = B; 3016 ierr = MatView_Private(B);CHKERRQ(ierr); 3017 } 3018 PetscFunctionReturn(0); 3019 } 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032