1 #define PETSCMAT_DLL 2 3 /* 4 Defines the basic matrix operations for the MAIJ matrix storage format. 5 This format is used for restriction and interpolation operations for 6 multicomponent problems. It interpolates each component the same way 7 independently. 8 9 We provide: 10 MatMult() 11 MatMultTranspose() 12 MatMultTransposeAdd() 13 MatMultAdd() 14 and 15 MatCreateMAIJ(Mat,dof,Mat*) 16 17 This single directory handles both the sequential and parallel codes 18 */ 19 20 #include "src/mat/impls/maij/maij.h" 21 #include "src/mat/utils/freespace.h" 22 #include "private/vecimpl.h" 23 24 #undef __FUNCT__ 25 #define __FUNCT__ "MatMAIJGetAIJ" 26 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B) 27 { 28 PetscErrorCode ierr; 29 PetscTruth ismpimaij,isseqmaij; 30 31 PetscFunctionBegin; 32 ierr = PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);CHKERRQ(ierr); 33 ierr = PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);CHKERRQ(ierr); 34 if (ismpimaij) { 35 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 36 37 *B = b->A; 38 } else if (isseqmaij) { 39 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 40 41 *B = b->AIJ; 42 } else { 43 *B = A; 44 } 45 PetscFunctionReturn(0); 46 } 47 48 #undef __FUNCT__ 49 #define __FUNCT__ "MatMAIJRedimension" 50 PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B) 51 { 52 PetscErrorCode ierr; 53 Mat Aij; 54 55 PetscFunctionBegin; 56 ierr = MatMAIJGetAIJ(A,&Aij);CHKERRQ(ierr); 57 ierr = MatCreateMAIJ(Aij,dof,B);CHKERRQ(ierr); 58 PetscFunctionReturn(0); 59 } 60 61 #undef __FUNCT__ 62 #define __FUNCT__ "MatDestroy_SeqMAIJ" 63 PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 64 { 65 PetscErrorCode ierr; 66 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 67 68 PetscFunctionBegin; 69 if (b->AIJ) { 70 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 71 } 72 ierr = PetscFree(b);CHKERRQ(ierr); 73 PetscFunctionReturn(0); 74 } 75 76 #undef __FUNCT__ 77 #define __FUNCT__ "MatView_SeqMAIJ" 78 PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer) 79 { 80 PetscErrorCode ierr; 81 Mat B; 82 83 PetscFunctionBegin; 84 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B); 85 ierr = MatView(B,viewer);CHKERRQ(ierr); 86 ierr = MatDestroy(B);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatView_MPIMAIJ" 92 PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer) 93 { 94 PetscErrorCode ierr; 95 Mat B; 96 97 PetscFunctionBegin; 98 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B); 99 ierr = MatView(B,viewer);CHKERRQ(ierr); 100 ierr = MatDestroy(B);CHKERRQ(ierr); 101 PetscFunctionReturn(0); 102 } 103 104 #undef __FUNCT__ 105 #define __FUNCT__ "MatDestroy_MPIMAIJ" 106 PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 107 { 108 PetscErrorCode ierr; 109 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 110 111 PetscFunctionBegin; 112 if (b->AIJ) { 113 ierr = MatDestroy(b->AIJ);CHKERRQ(ierr); 114 } 115 if (b->OAIJ) { 116 ierr = MatDestroy(b->OAIJ);CHKERRQ(ierr); 117 } 118 if (b->A) { 119 ierr = MatDestroy(b->A);CHKERRQ(ierr); 120 } 121 if (b->ctx) { 122 ierr = VecScatterDestroy(b->ctx);CHKERRQ(ierr); 123 } 124 if (b->w) { 125 ierr = VecDestroy(b->w);CHKERRQ(ierr); 126 } 127 ierr = PetscFree(b);CHKERRQ(ierr); 128 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 /*MC 133 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 134 multicomponent problems, interpolating or restricting each component the same way independently. 135 The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices. 136 137 Operations provided: 138 . MatMult 139 . MatMultTranspose 140 . MatMultAdd 141 . MatMultTransposeAdd 142 143 Level: advanced 144 145 .seealso: MatCreateSeqDense 146 M*/ 147 148 EXTERN_C_BEGIN 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatCreate_MAIJ" 151 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A) 152 { 153 PetscErrorCode ierr; 154 Mat_MPIMAIJ *b; 155 PetscMPIInt size; 156 157 PetscFunctionBegin; 158 ierr = PetscNewLog(A,Mat_MPIMAIJ,&b);CHKERRQ(ierr); 159 A->data = (void*)b; 160 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 161 A->factor = 0; 162 A->mapping = 0; 163 164 b->AIJ = 0; 165 b->dof = 0; 166 b->OAIJ = 0; 167 b->ctx = 0; 168 b->w = 0; 169 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 170 if (size == 1){ 171 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQMAIJ);CHKERRQ(ierr); 172 } else { 173 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIMAIJ);CHKERRQ(ierr); 174 } 175 PetscFunctionReturn(0); 176 } 177 EXTERN_C_END 178 179 /* --------------------------------------------------------------------------------------*/ 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatMult_SeqMAIJ_2" 182 PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 183 { 184 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 185 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 186 PetscScalar *x,*y,*v,sum1, sum2; 187 PetscErrorCode ierr; 188 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 189 PetscInt n,i,jrow,j; 190 191 PetscFunctionBegin; 192 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 193 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 194 idx = a->j; 195 v = a->a; 196 ii = a->i; 197 198 for (i=0; i<m; i++) { 199 jrow = ii[i]; 200 n = ii[i+1] - jrow; 201 sum1 = 0.0; 202 sum2 = 0.0; 203 for (j=0; j<n; j++) { 204 sum1 += v[jrow]*x[2*idx[jrow]]; 205 sum2 += v[jrow]*x[2*idx[jrow]+1]; 206 jrow++; 207 } 208 y[2*i] = sum1; 209 y[2*i+1] = sum2; 210 } 211 212 ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 213 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 214 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_2" 220 PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy) 221 { 222 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 223 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 224 PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0; 225 PetscErrorCode ierr; 226 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 227 228 PetscFunctionBegin; 229 ierr = VecSet(yy,zero);CHKERRQ(ierr); 230 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 231 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 232 233 for (i=0; i<m; i++) { 234 idx = a->j + a->i[i] ; 235 v = a->a + a->i[i] ; 236 n = a->i[i+1] - a->i[i]; 237 alpha1 = x[2*i]; 238 alpha2 = x[2*i+1]; 239 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 240 } 241 ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr); 242 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 243 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 244 PetscFunctionReturn(0); 245 } 246 247 #undef __FUNCT__ 248 #define __FUNCT__ "MatMultAdd_SeqMAIJ_2" 249 PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 250 { 251 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 252 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 253 PetscScalar *x,*y,*v,sum1, sum2; 254 PetscErrorCode ierr; 255 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 256 PetscInt n,i,jrow,j; 257 258 PetscFunctionBegin; 259 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 260 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 261 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 262 idx = a->j; 263 v = a->a; 264 ii = a->i; 265 266 for (i=0; i<m; i++) { 267 jrow = ii[i]; 268 n = ii[i+1] - jrow; 269 sum1 = 0.0; 270 sum2 = 0.0; 271 for (j=0; j<n; j++) { 272 sum1 += v[jrow]*x[2*idx[jrow]]; 273 sum2 += v[jrow]*x[2*idx[jrow]+1]; 274 jrow++; 275 } 276 y[2*i] += sum1; 277 y[2*i+1] += sum2; 278 } 279 280 ierr = PetscLogFlops(4*a->nz - 2*m);CHKERRQ(ierr); 281 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 282 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 283 PetscFunctionReturn(0); 284 } 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_2" 287 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz) 288 { 289 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 290 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 291 PetscScalar *x,*y,*v,alpha1,alpha2; 292 PetscErrorCode ierr; 293 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 294 295 PetscFunctionBegin; 296 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 297 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 298 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 299 300 for (i=0; i<m; i++) { 301 idx = a->j + a->i[i] ; 302 v = a->a + a->i[i] ; 303 n = a->i[i+1] - a->i[i]; 304 alpha1 = x[2*i]; 305 alpha2 = x[2*i+1]; 306 while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;} 307 } 308 ierr = PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);CHKERRQ(ierr); 309 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 310 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 /* --------------------------------------------------------------------------------------*/ 314 #undef __FUNCT__ 315 #define __FUNCT__ "MatMult_SeqMAIJ_3" 316 PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 317 { 318 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 319 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 320 PetscScalar *x,*y,*v,sum1, sum2, sum3; 321 PetscErrorCode ierr; 322 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 323 PetscInt n,i,jrow,j; 324 325 PetscFunctionBegin; 326 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 327 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 328 idx = a->j; 329 v = a->a; 330 ii = a->i; 331 332 for (i=0; i<m; i++) { 333 jrow = ii[i]; 334 n = ii[i+1] - jrow; 335 sum1 = 0.0; 336 sum2 = 0.0; 337 sum3 = 0.0; 338 for (j=0; j<n; j++) { 339 sum1 += v[jrow]*x[3*idx[jrow]]; 340 sum2 += v[jrow]*x[3*idx[jrow]+1]; 341 sum3 += v[jrow]*x[3*idx[jrow]+2]; 342 jrow++; 343 } 344 y[3*i] = sum1; 345 y[3*i+1] = sum2; 346 y[3*i+2] = sum3; 347 } 348 349 ierr = PetscLogFlops(6*a->nz - 3*m);CHKERRQ(ierr); 350 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 351 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 352 PetscFunctionReturn(0); 353 } 354 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_3" 357 PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy) 358 { 359 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 360 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 361 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0; 362 PetscErrorCode ierr; 363 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 364 365 PetscFunctionBegin; 366 ierr = VecSet(yy,zero);CHKERRQ(ierr); 367 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 368 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 369 370 for (i=0; i<m; i++) { 371 idx = a->j + a->i[i]; 372 v = a->a + a->i[i]; 373 n = a->i[i+1] - a->i[i]; 374 alpha1 = x[3*i]; 375 alpha2 = x[3*i+1]; 376 alpha3 = x[3*i+2]; 377 while (n-->0) { 378 y[3*(*idx)] += alpha1*(*v); 379 y[3*(*idx)+1] += alpha2*(*v); 380 y[3*(*idx)+2] += alpha3*(*v); 381 idx++; v++; 382 } 383 } 384 ierr = PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);CHKERRQ(ierr); 385 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 386 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 387 PetscFunctionReturn(0); 388 } 389 390 #undef __FUNCT__ 391 #define __FUNCT__ "MatMultAdd_SeqMAIJ_3" 392 PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 393 { 394 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 395 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 396 PetscScalar *x,*y,*v,sum1, sum2, sum3; 397 PetscErrorCode ierr; 398 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 399 PetscInt n,i,jrow,j; 400 401 PetscFunctionBegin; 402 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 403 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 404 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 405 idx = a->j; 406 v = a->a; 407 ii = a->i; 408 409 for (i=0; i<m; i++) { 410 jrow = ii[i]; 411 n = ii[i+1] - jrow; 412 sum1 = 0.0; 413 sum2 = 0.0; 414 sum3 = 0.0; 415 for (j=0; j<n; j++) { 416 sum1 += v[jrow]*x[3*idx[jrow]]; 417 sum2 += v[jrow]*x[3*idx[jrow]+1]; 418 sum3 += v[jrow]*x[3*idx[jrow]+2]; 419 jrow++; 420 } 421 y[3*i] += sum1; 422 y[3*i+1] += sum2; 423 y[3*i+2] += sum3; 424 } 425 426 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 427 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 428 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 #undef __FUNCT__ 432 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_3" 433 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz) 434 { 435 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 436 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 437 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3; 438 PetscErrorCode ierr; 439 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 440 441 PetscFunctionBegin; 442 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 443 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 444 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 445 for (i=0; i<m; i++) { 446 idx = a->j + a->i[i] ; 447 v = a->a + a->i[i] ; 448 n = a->i[i+1] - a->i[i]; 449 alpha1 = x[3*i]; 450 alpha2 = x[3*i+1]; 451 alpha3 = x[3*i+2]; 452 while (n-->0) { 453 y[3*(*idx)] += alpha1*(*v); 454 y[3*(*idx)+1] += alpha2*(*v); 455 y[3*(*idx)+2] += alpha3*(*v); 456 idx++; v++; 457 } 458 } 459 ierr = PetscLogFlops(6*a->nz);CHKERRQ(ierr); 460 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 461 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 /* ------------------------------------------------------------------------------*/ 466 #undef __FUNCT__ 467 #define __FUNCT__ "MatMult_SeqMAIJ_4" 468 PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 469 { 470 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 471 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 472 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 473 PetscErrorCode ierr; 474 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 475 PetscInt n,i,jrow,j; 476 477 PetscFunctionBegin; 478 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 479 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 480 idx = a->j; 481 v = a->a; 482 ii = a->i; 483 484 for (i=0; i<m; i++) { 485 jrow = ii[i]; 486 n = ii[i+1] - jrow; 487 sum1 = 0.0; 488 sum2 = 0.0; 489 sum3 = 0.0; 490 sum4 = 0.0; 491 for (j=0; j<n; j++) { 492 sum1 += v[jrow]*x[4*idx[jrow]]; 493 sum2 += v[jrow]*x[4*idx[jrow]+1]; 494 sum3 += v[jrow]*x[4*idx[jrow]+2]; 495 sum4 += v[jrow]*x[4*idx[jrow]+3]; 496 jrow++; 497 } 498 y[4*i] = sum1; 499 y[4*i+1] = sum2; 500 y[4*i+2] = sum3; 501 y[4*i+3] = sum4; 502 } 503 504 ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 505 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 506 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 507 PetscFunctionReturn(0); 508 } 509 510 #undef __FUNCT__ 511 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_4" 512 PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy) 513 { 514 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 515 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 516 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0; 517 PetscErrorCode ierr; 518 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 519 520 PetscFunctionBegin; 521 ierr = VecSet(yy,zero);CHKERRQ(ierr); 522 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 523 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 524 for (i=0; i<m; i++) { 525 idx = a->j + a->i[i] ; 526 v = a->a + a->i[i] ; 527 n = a->i[i+1] - a->i[i]; 528 alpha1 = x[4*i]; 529 alpha2 = x[4*i+1]; 530 alpha3 = x[4*i+2]; 531 alpha4 = x[4*i+3]; 532 while (n-->0) { 533 y[4*(*idx)] += alpha1*(*v); 534 y[4*(*idx)+1] += alpha2*(*v); 535 y[4*(*idx)+2] += alpha3*(*v); 536 y[4*(*idx)+3] += alpha4*(*v); 537 idx++; v++; 538 } 539 } 540 ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr); 541 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 542 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 543 PetscFunctionReturn(0); 544 } 545 546 #undef __FUNCT__ 547 #define __FUNCT__ "MatMultAdd_SeqMAIJ_4" 548 PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 549 { 550 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 551 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 552 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4; 553 PetscErrorCode ierr; 554 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 555 PetscInt n,i,jrow,j; 556 557 PetscFunctionBegin; 558 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 559 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 560 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 561 idx = a->j; 562 v = a->a; 563 ii = a->i; 564 565 for (i=0; i<m; i++) { 566 jrow = ii[i]; 567 n = ii[i+1] - jrow; 568 sum1 = 0.0; 569 sum2 = 0.0; 570 sum3 = 0.0; 571 sum4 = 0.0; 572 for (j=0; j<n; j++) { 573 sum1 += v[jrow]*x[4*idx[jrow]]; 574 sum2 += v[jrow]*x[4*idx[jrow]+1]; 575 sum3 += v[jrow]*x[4*idx[jrow]+2]; 576 sum4 += v[jrow]*x[4*idx[jrow]+3]; 577 jrow++; 578 } 579 y[4*i] += sum1; 580 y[4*i+1] += sum2; 581 y[4*i+2] += sum3; 582 y[4*i+3] += sum4; 583 } 584 585 ierr = PetscLogFlops(8*a->nz - 4*m);CHKERRQ(ierr); 586 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 587 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 588 PetscFunctionReturn(0); 589 } 590 #undef __FUNCT__ 591 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_4" 592 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz) 593 { 594 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 595 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 596 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4; 597 PetscErrorCode ierr; 598 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 599 600 PetscFunctionBegin; 601 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 602 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 603 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 604 605 for (i=0; i<m; i++) { 606 idx = a->j + a->i[i] ; 607 v = a->a + a->i[i] ; 608 n = a->i[i+1] - a->i[i]; 609 alpha1 = x[4*i]; 610 alpha2 = x[4*i+1]; 611 alpha3 = x[4*i+2]; 612 alpha4 = x[4*i+3]; 613 while (n-->0) { 614 y[4*(*idx)] += alpha1*(*v); 615 y[4*(*idx)+1] += alpha2*(*v); 616 y[4*(*idx)+2] += alpha3*(*v); 617 y[4*(*idx)+3] += alpha4*(*v); 618 idx++; v++; 619 } 620 } 621 ierr = PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);CHKERRQ(ierr); 622 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 623 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 /* ------------------------------------------------------------------------------*/ 627 628 #undef __FUNCT__ 629 #define __FUNCT__ "MatMult_SeqMAIJ_5" 630 PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 631 { 632 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 633 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 634 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 635 PetscErrorCode ierr; 636 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 637 PetscInt n,i,jrow,j; 638 639 PetscFunctionBegin; 640 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 641 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 642 idx = a->j; 643 v = a->a; 644 ii = a->i; 645 646 for (i=0; i<m; i++) { 647 jrow = ii[i]; 648 n = ii[i+1] - jrow; 649 sum1 = 0.0; 650 sum2 = 0.0; 651 sum3 = 0.0; 652 sum4 = 0.0; 653 sum5 = 0.0; 654 for (j=0; j<n; j++) { 655 sum1 += v[jrow]*x[5*idx[jrow]]; 656 sum2 += v[jrow]*x[5*idx[jrow]+1]; 657 sum3 += v[jrow]*x[5*idx[jrow]+2]; 658 sum4 += v[jrow]*x[5*idx[jrow]+3]; 659 sum5 += v[jrow]*x[5*idx[jrow]+4]; 660 jrow++; 661 } 662 y[5*i] = sum1; 663 y[5*i+1] = sum2; 664 y[5*i+2] = sum3; 665 y[5*i+3] = sum4; 666 y[5*i+4] = sum5; 667 } 668 669 ierr = PetscLogFlops(10*a->nz - 5*m);CHKERRQ(ierr); 670 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 671 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 672 PetscFunctionReturn(0); 673 } 674 675 #undef __FUNCT__ 676 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_5" 677 PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy) 678 { 679 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 680 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 681 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0; 682 PetscErrorCode ierr; 683 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 684 685 PetscFunctionBegin; 686 ierr = VecSet(yy,zero);CHKERRQ(ierr); 687 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 688 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 689 690 for (i=0; i<m; i++) { 691 idx = a->j + a->i[i] ; 692 v = a->a + a->i[i] ; 693 n = a->i[i+1] - a->i[i]; 694 alpha1 = x[5*i]; 695 alpha2 = x[5*i+1]; 696 alpha3 = x[5*i+2]; 697 alpha4 = x[5*i+3]; 698 alpha5 = x[5*i+4]; 699 while (n-->0) { 700 y[5*(*idx)] += alpha1*(*v); 701 y[5*(*idx)+1] += alpha2*(*v); 702 y[5*(*idx)+2] += alpha3*(*v); 703 y[5*(*idx)+3] += alpha4*(*v); 704 y[5*(*idx)+4] += alpha5*(*v); 705 idx++; v++; 706 } 707 } 708 ierr = PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);CHKERRQ(ierr); 709 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 710 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 714 #undef __FUNCT__ 715 #define __FUNCT__ "MatMultAdd_SeqMAIJ_5" 716 PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 717 { 718 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 719 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 720 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5; 721 PetscErrorCode ierr; 722 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 723 PetscInt n,i,jrow,j; 724 725 PetscFunctionBegin; 726 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 727 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 728 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 729 idx = a->j; 730 v = a->a; 731 ii = a->i; 732 733 for (i=0; i<m; i++) { 734 jrow = ii[i]; 735 n = ii[i+1] - jrow; 736 sum1 = 0.0; 737 sum2 = 0.0; 738 sum3 = 0.0; 739 sum4 = 0.0; 740 sum5 = 0.0; 741 for (j=0; j<n; j++) { 742 sum1 += v[jrow]*x[5*idx[jrow]]; 743 sum2 += v[jrow]*x[5*idx[jrow]+1]; 744 sum3 += v[jrow]*x[5*idx[jrow]+2]; 745 sum4 += v[jrow]*x[5*idx[jrow]+3]; 746 sum5 += v[jrow]*x[5*idx[jrow]+4]; 747 jrow++; 748 } 749 y[5*i] += sum1; 750 y[5*i+1] += sum2; 751 y[5*i+2] += sum3; 752 y[5*i+3] += sum4; 753 y[5*i+4] += sum5; 754 } 755 756 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 757 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 758 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 759 PetscFunctionReturn(0); 760 } 761 762 #undef __FUNCT__ 763 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_5" 764 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz) 765 { 766 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 767 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 768 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5; 769 PetscErrorCode ierr; 770 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 771 772 PetscFunctionBegin; 773 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 774 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 775 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 776 777 for (i=0; i<m; i++) { 778 idx = a->j + a->i[i] ; 779 v = a->a + a->i[i] ; 780 n = a->i[i+1] - a->i[i]; 781 alpha1 = x[5*i]; 782 alpha2 = x[5*i+1]; 783 alpha3 = x[5*i+2]; 784 alpha4 = x[5*i+3]; 785 alpha5 = x[5*i+4]; 786 while (n-->0) { 787 y[5*(*idx)] += alpha1*(*v); 788 y[5*(*idx)+1] += alpha2*(*v); 789 y[5*(*idx)+2] += alpha3*(*v); 790 y[5*(*idx)+3] += alpha4*(*v); 791 y[5*(*idx)+4] += alpha5*(*v); 792 idx++; v++; 793 } 794 } 795 ierr = PetscLogFlops(10*a->nz);CHKERRQ(ierr); 796 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 797 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 798 PetscFunctionReturn(0); 799 } 800 801 /* ------------------------------------------------------------------------------*/ 802 #undef __FUNCT__ 803 #define __FUNCT__ "MatMult_SeqMAIJ_6" 804 PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 805 { 806 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 807 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 808 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 809 PetscErrorCode ierr; 810 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 811 PetscInt n,i,jrow,j; 812 813 PetscFunctionBegin; 814 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 815 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 816 idx = a->j; 817 v = a->a; 818 ii = a->i; 819 820 for (i=0; i<m; i++) { 821 jrow = ii[i]; 822 n = ii[i+1] - jrow; 823 sum1 = 0.0; 824 sum2 = 0.0; 825 sum3 = 0.0; 826 sum4 = 0.0; 827 sum5 = 0.0; 828 sum6 = 0.0; 829 for (j=0; j<n; j++) { 830 sum1 += v[jrow]*x[6*idx[jrow]]; 831 sum2 += v[jrow]*x[6*idx[jrow]+1]; 832 sum3 += v[jrow]*x[6*idx[jrow]+2]; 833 sum4 += v[jrow]*x[6*idx[jrow]+3]; 834 sum5 += v[jrow]*x[6*idx[jrow]+4]; 835 sum6 += v[jrow]*x[6*idx[jrow]+5]; 836 jrow++; 837 } 838 y[6*i] = sum1; 839 y[6*i+1] = sum2; 840 y[6*i+2] = sum3; 841 y[6*i+3] = sum4; 842 y[6*i+4] = sum5; 843 y[6*i+5] = sum6; 844 } 845 846 ierr = PetscLogFlops(12*a->nz - 6*m);CHKERRQ(ierr); 847 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 848 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 849 PetscFunctionReturn(0); 850 } 851 852 #undef __FUNCT__ 853 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_6" 854 PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy) 855 { 856 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 857 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 858 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0; 859 PetscErrorCode ierr; 860 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 861 862 PetscFunctionBegin; 863 ierr = VecSet(yy,zero);CHKERRQ(ierr); 864 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 865 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 866 867 for (i=0; i<m; i++) { 868 idx = a->j + a->i[i] ; 869 v = a->a + a->i[i] ; 870 n = a->i[i+1] - a->i[i]; 871 alpha1 = x[6*i]; 872 alpha2 = x[6*i+1]; 873 alpha3 = x[6*i+2]; 874 alpha4 = x[6*i+3]; 875 alpha5 = x[6*i+4]; 876 alpha6 = x[6*i+5]; 877 while (n-->0) { 878 y[6*(*idx)] += alpha1*(*v); 879 y[6*(*idx)+1] += alpha2*(*v); 880 y[6*(*idx)+2] += alpha3*(*v); 881 y[6*(*idx)+3] += alpha4*(*v); 882 y[6*(*idx)+4] += alpha5*(*v); 883 y[6*(*idx)+5] += alpha6*(*v); 884 idx++; v++; 885 } 886 } 887 ierr = PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);CHKERRQ(ierr); 888 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 889 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 890 PetscFunctionReturn(0); 891 } 892 893 #undef __FUNCT__ 894 #define __FUNCT__ "MatMultAdd_SeqMAIJ_6" 895 PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 896 { 897 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 898 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 899 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6; 900 PetscErrorCode ierr; 901 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 902 PetscInt n,i,jrow,j; 903 904 PetscFunctionBegin; 905 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 906 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 907 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 908 idx = a->j; 909 v = a->a; 910 ii = a->i; 911 912 for (i=0; i<m; i++) { 913 jrow = ii[i]; 914 n = ii[i+1] - jrow; 915 sum1 = 0.0; 916 sum2 = 0.0; 917 sum3 = 0.0; 918 sum4 = 0.0; 919 sum5 = 0.0; 920 sum6 = 0.0; 921 for (j=0; j<n; j++) { 922 sum1 += v[jrow]*x[6*idx[jrow]]; 923 sum2 += v[jrow]*x[6*idx[jrow]+1]; 924 sum3 += v[jrow]*x[6*idx[jrow]+2]; 925 sum4 += v[jrow]*x[6*idx[jrow]+3]; 926 sum5 += v[jrow]*x[6*idx[jrow]+4]; 927 sum6 += v[jrow]*x[6*idx[jrow]+5]; 928 jrow++; 929 } 930 y[6*i] += sum1; 931 y[6*i+1] += sum2; 932 y[6*i+2] += sum3; 933 y[6*i+3] += sum4; 934 y[6*i+4] += sum5; 935 y[6*i+5] += sum6; 936 } 937 938 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 939 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 940 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 941 PetscFunctionReturn(0); 942 } 943 944 #undef __FUNCT__ 945 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_6" 946 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz) 947 { 948 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 949 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 950 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6; 951 PetscErrorCode ierr; 952 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 953 954 PetscFunctionBegin; 955 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 956 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 957 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 958 959 for (i=0; i<m; i++) { 960 idx = a->j + a->i[i] ; 961 v = a->a + a->i[i] ; 962 n = a->i[i+1] - a->i[i]; 963 alpha1 = x[6*i]; 964 alpha2 = x[6*i+1]; 965 alpha3 = x[6*i+2]; 966 alpha4 = x[6*i+3]; 967 alpha5 = x[6*i+4]; 968 alpha6 = x[6*i+5]; 969 while (n-->0) { 970 y[6*(*idx)] += alpha1*(*v); 971 y[6*(*idx)+1] += alpha2*(*v); 972 y[6*(*idx)+2] += alpha3*(*v); 973 y[6*(*idx)+3] += alpha4*(*v); 974 y[6*(*idx)+4] += alpha5*(*v); 975 y[6*(*idx)+5] += alpha6*(*v); 976 idx++; v++; 977 } 978 } 979 ierr = PetscLogFlops(12*a->nz);CHKERRQ(ierr); 980 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 981 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 982 PetscFunctionReturn(0); 983 } 984 985 /* ------------------------------------------------------------------------------*/ 986 #undef __FUNCT__ 987 #define __FUNCT__ "MatMult_SeqMAIJ_7" 988 PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 989 { 990 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 991 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 992 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 993 PetscErrorCode ierr; 994 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 995 PetscInt n,i,jrow,j; 996 997 PetscFunctionBegin; 998 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 999 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1000 idx = a->j; 1001 v = a->a; 1002 ii = a->i; 1003 1004 for (i=0; i<m; i++) { 1005 jrow = ii[i]; 1006 n = ii[i+1] - jrow; 1007 sum1 = 0.0; 1008 sum2 = 0.0; 1009 sum3 = 0.0; 1010 sum4 = 0.0; 1011 sum5 = 0.0; 1012 sum6 = 0.0; 1013 sum7 = 0.0; 1014 for (j=0; j<n; j++) { 1015 sum1 += v[jrow]*x[7*idx[jrow]]; 1016 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1017 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1018 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1019 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1020 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1021 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1022 jrow++; 1023 } 1024 y[7*i] = sum1; 1025 y[7*i+1] = sum2; 1026 y[7*i+2] = sum3; 1027 y[7*i+3] = sum4; 1028 y[7*i+4] = sum5; 1029 y[7*i+5] = sum6; 1030 y[7*i+6] = sum7; 1031 } 1032 1033 ierr = PetscLogFlops(14*a->nz - 7*m);CHKERRQ(ierr); 1034 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1035 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1036 PetscFunctionReturn(0); 1037 } 1038 1039 #undef __FUNCT__ 1040 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_7" 1041 PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy) 1042 { 1043 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1044 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1045 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0; 1046 PetscErrorCode ierr; 1047 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1048 1049 PetscFunctionBegin; 1050 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1051 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1052 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1053 1054 for (i=0; i<m; i++) { 1055 idx = a->j + a->i[i] ; 1056 v = a->a + a->i[i] ; 1057 n = a->i[i+1] - a->i[i]; 1058 alpha1 = x[7*i]; 1059 alpha2 = x[7*i+1]; 1060 alpha3 = x[7*i+2]; 1061 alpha4 = x[7*i+3]; 1062 alpha5 = x[7*i+4]; 1063 alpha6 = x[7*i+5]; 1064 alpha7 = x[7*i+6]; 1065 while (n-->0) { 1066 y[7*(*idx)] += alpha1*(*v); 1067 y[7*(*idx)+1] += alpha2*(*v); 1068 y[7*(*idx)+2] += alpha3*(*v); 1069 y[7*(*idx)+3] += alpha4*(*v); 1070 y[7*(*idx)+4] += alpha5*(*v); 1071 y[7*(*idx)+5] += alpha6*(*v); 1072 y[7*(*idx)+6] += alpha7*(*v); 1073 idx++; v++; 1074 } 1075 } 1076 ierr = PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);CHKERRQ(ierr); 1077 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1078 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1079 PetscFunctionReturn(0); 1080 } 1081 1082 #undef __FUNCT__ 1083 #define __FUNCT__ "MatMultAdd_SeqMAIJ_7" 1084 PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1085 { 1086 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1087 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1088 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7; 1089 PetscErrorCode ierr; 1090 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1091 PetscInt n,i,jrow,j; 1092 1093 PetscFunctionBegin; 1094 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1095 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1096 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1097 idx = a->j; 1098 v = a->a; 1099 ii = a->i; 1100 1101 for (i=0; i<m; i++) { 1102 jrow = ii[i]; 1103 n = ii[i+1] - jrow; 1104 sum1 = 0.0; 1105 sum2 = 0.0; 1106 sum3 = 0.0; 1107 sum4 = 0.0; 1108 sum5 = 0.0; 1109 sum6 = 0.0; 1110 sum7 = 0.0; 1111 for (j=0; j<n; j++) { 1112 sum1 += v[jrow]*x[7*idx[jrow]]; 1113 sum2 += v[jrow]*x[7*idx[jrow]+1]; 1114 sum3 += v[jrow]*x[7*idx[jrow]+2]; 1115 sum4 += v[jrow]*x[7*idx[jrow]+3]; 1116 sum5 += v[jrow]*x[7*idx[jrow]+4]; 1117 sum6 += v[jrow]*x[7*idx[jrow]+5]; 1118 sum7 += v[jrow]*x[7*idx[jrow]+6]; 1119 jrow++; 1120 } 1121 y[7*i] += sum1; 1122 y[7*i+1] += sum2; 1123 y[7*i+2] += sum3; 1124 y[7*i+3] += sum4; 1125 y[7*i+4] += sum5; 1126 y[7*i+5] += sum6; 1127 y[7*i+6] += sum7; 1128 } 1129 1130 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1131 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1132 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 #undef __FUNCT__ 1137 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_7" 1138 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz) 1139 { 1140 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1141 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1142 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7; 1143 PetscErrorCode ierr; 1144 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1145 1146 PetscFunctionBegin; 1147 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1148 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1149 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1150 for (i=0; i<m; i++) { 1151 idx = a->j + a->i[i] ; 1152 v = a->a + a->i[i] ; 1153 n = a->i[i+1] - a->i[i]; 1154 alpha1 = x[7*i]; 1155 alpha2 = x[7*i+1]; 1156 alpha3 = x[7*i+2]; 1157 alpha4 = x[7*i+3]; 1158 alpha5 = x[7*i+4]; 1159 alpha6 = x[7*i+5]; 1160 alpha7 = x[7*i+6]; 1161 while (n-->0) { 1162 y[7*(*idx)] += alpha1*(*v); 1163 y[7*(*idx)+1] += alpha2*(*v); 1164 y[7*(*idx)+2] += alpha3*(*v); 1165 y[7*(*idx)+3] += alpha4*(*v); 1166 y[7*(*idx)+4] += alpha5*(*v); 1167 y[7*(*idx)+5] += alpha6*(*v); 1168 y[7*(*idx)+6] += alpha7*(*v); 1169 idx++; v++; 1170 } 1171 } 1172 ierr = PetscLogFlops(14*a->nz);CHKERRQ(ierr); 1173 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1174 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 1178 #undef __FUNCT__ 1179 #define __FUNCT__ "MatMult_SeqMAIJ_8" 1180 PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1181 { 1182 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1183 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1184 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1185 PetscErrorCode ierr; 1186 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1187 PetscInt n,i,jrow,j; 1188 1189 PetscFunctionBegin; 1190 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1191 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1192 idx = a->j; 1193 v = a->a; 1194 ii = a->i; 1195 1196 for (i=0; i<m; i++) { 1197 jrow = ii[i]; 1198 n = ii[i+1] - jrow; 1199 sum1 = 0.0; 1200 sum2 = 0.0; 1201 sum3 = 0.0; 1202 sum4 = 0.0; 1203 sum5 = 0.0; 1204 sum6 = 0.0; 1205 sum7 = 0.0; 1206 sum8 = 0.0; 1207 for (j=0; j<n; j++) { 1208 sum1 += v[jrow]*x[8*idx[jrow]]; 1209 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1210 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1211 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1212 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1213 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1214 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1215 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1216 jrow++; 1217 } 1218 y[8*i] = sum1; 1219 y[8*i+1] = sum2; 1220 y[8*i+2] = sum3; 1221 y[8*i+3] = sum4; 1222 y[8*i+4] = sum5; 1223 y[8*i+5] = sum6; 1224 y[8*i+6] = sum7; 1225 y[8*i+7] = sum8; 1226 } 1227 1228 ierr = PetscLogFlops(16*a->nz - 8*m);CHKERRQ(ierr); 1229 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1230 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1231 PetscFunctionReturn(0); 1232 } 1233 1234 #undef __FUNCT__ 1235 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_8" 1236 PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy) 1237 { 1238 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1239 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1240 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1241 PetscErrorCode ierr; 1242 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1243 1244 PetscFunctionBegin; 1245 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1246 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1247 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1248 1249 for (i=0; i<m; i++) { 1250 idx = a->j + a->i[i] ; 1251 v = a->a + a->i[i] ; 1252 n = a->i[i+1] - a->i[i]; 1253 alpha1 = x[8*i]; 1254 alpha2 = x[8*i+1]; 1255 alpha3 = x[8*i+2]; 1256 alpha4 = x[8*i+3]; 1257 alpha5 = x[8*i+4]; 1258 alpha6 = x[8*i+5]; 1259 alpha7 = x[8*i+6]; 1260 alpha8 = x[8*i+7]; 1261 while (n-->0) { 1262 y[8*(*idx)] += alpha1*(*v); 1263 y[8*(*idx)+1] += alpha2*(*v); 1264 y[8*(*idx)+2] += alpha3*(*v); 1265 y[8*(*idx)+3] += alpha4*(*v); 1266 y[8*(*idx)+4] += alpha5*(*v); 1267 y[8*(*idx)+5] += alpha6*(*v); 1268 y[8*(*idx)+6] += alpha7*(*v); 1269 y[8*(*idx)+7] += alpha8*(*v); 1270 idx++; v++; 1271 } 1272 } 1273 ierr = PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);CHKERRQ(ierr); 1274 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1275 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1276 PetscFunctionReturn(0); 1277 } 1278 1279 #undef __FUNCT__ 1280 #define __FUNCT__ "MatMultAdd_SeqMAIJ_8" 1281 PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1282 { 1283 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1284 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1285 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1286 PetscErrorCode ierr; 1287 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1288 PetscInt n,i,jrow,j; 1289 1290 PetscFunctionBegin; 1291 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1292 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1293 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1294 idx = a->j; 1295 v = a->a; 1296 ii = a->i; 1297 1298 for (i=0; i<m; i++) { 1299 jrow = ii[i]; 1300 n = ii[i+1] - jrow; 1301 sum1 = 0.0; 1302 sum2 = 0.0; 1303 sum3 = 0.0; 1304 sum4 = 0.0; 1305 sum5 = 0.0; 1306 sum6 = 0.0; 1307 sum7 = 0.0; 1308 sum8 = 0.0; 1309 for (j=0; j<n; j++) { 1310 sum1 += v[jrow]*x[8*idx[jrow]]; 1311 sum2 += v[jrow]*x[8*idx[jrow]+1]; 1312 sum3 += v[jrow]*x[8*idx[jrow]+2]; 1313 sum4 += v[jrow]*x[8*idx[jrow]+3]; 1314 sum5 += v[jrow]*x[8*idx[jrow]+4]; 1315 sum6 += v[jrow]*x[8*idx[jrow]+5]; 1316 sum7 += v[jrow]*x[8*idx[jrow]+6]; 1317 sum8 += v[jrow]*x[8*idx[jrow]+7]; 1318 jrow++; 1319 } 1320 y[8*i] += sum1; 1321 y[8*i+1] += sum2; 1322 y[8*i+2] += sum3; 1323 y[8*i+3] += sum4; 1324 y[8*i+4] += sum5; 1325 y[8*i+5] += sum6; 1326 y[8*i+6] += sum7; 1327 y[8*i+7] += sum8; 1328 } 1329 1330 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1331 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1332 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 #undef __FUNCT__ 1337 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_8" 1338 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz) 1339 { 1340 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1341 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1342 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 1343 PetscErrorCode ierr; 1344 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1345 1346 PetscFunctionBegin; 1347 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1348 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1349 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1350 for (i=0; i<m; i++) { 1351 idx = a->j + a->i[i] ; 1352 v = a->a + a->i[i] ; 1353 n = a->i[i+1] - a->i[i]; 1354 alpha1 = x[8*i]; 1355 alpha2 = x[8*i+1]; 1356 alpha3 = x[8*i+2]; 1357 alpha4 = x[8*i+3]; 1358 alpha5 = x[8*i+4]; 1359 alpha6 = x[8*i+5]; 1360 alpha7 = x[8*i+6]; 1361 alpha8 = x[8*i+7]; 1362 while (n-->0) { 1363 y[8*(*idx)] += alpha1*(*v); 1364 y[8*(*idx)+1] += alpha2*(*v); 1365 y[8*(*idx)+2] += alpha3*(*v); 1366 y[8*(*idx)+3] += alpha4*(*v); 1367 y[8*(*idx)+4] += alpha5*(*v); 1368 y[8*(*idx)+5] += alpha6*(*v); 1369 y[8*(*idx)+6] += alpha7*(*v); 1370 y[8*(*idx)+7] += alpha8*(*v); 1371 idx++; v++; 1372 } 1373 } 1374 ierr = PetscLogFlops(16*a->nz);CHKERRQ(ierr); 1375 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1376 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1377 PetscFunctionReturn(0); 1378 } 1379 1380 /* ------------------------------------------------------------------------------*/ 1381 #undef __FUNCT__ 1382 #define __FUNCT__ "MatMult_SeqMAIJ_9" 1383 PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1384 { 1385 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1386 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1387 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1388 PetscErrorCode ierr; 1389 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1390 PetscInt n,i,jrow,j; 1391 1392 PetscFunctionBegin; 1393 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1394 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1395 idx = a->j; 1396 v = a->a; 1397 ii = a->i; 1398 1399 for (i=0; i<m; i++) { 1400 jrow = ii[i]; 1401 n = ii[i+1] - jrow; 1402 sum1 = 0.0; 1403 sum2 = 0.0; 1404 sum3 = 0.0; 1405 sum4 = 0.0; 1406 sum5 = 0.0; 1407 sum6 = 0.0; 1408 sum7 = 0.0; 1409 sum8 = 0.0; 1410 sum9 = 0.0; 1411 for (j=0; j<n; j++) { 1412 sum1 += v[jrow]*x[9*idx[jrow]]; 1413 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1414 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1415 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1416 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1417 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1418 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1419 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1420 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1421 jrow++; 1422 } 1423 y[9*i] = sum1; 1424 y[9*i+1] = sum2; 1425 y[9*i+2] = sum3; 1426 y[9*i+3] = sum4; 1427 y[9*i+4] = sum5; 1428 y[9*i+5] = sum6; 1429 y[9*i+6] = sum7; 1430 y[9*i+7] = sum8; 1431 y[9*i+8] = sum9; 1432 } 1433 1434 ierr = PetscLogFlops(18*a->nz - 9*m);CHKERRQ(ierr); 1435 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1436 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1437 PetscFunctionReturn(0); 1438 } 1439 1440 /* ------------------------------------------------------------------------------*/ 1441 1442 #undef __FUNCT__ 1443 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_9" 1444 PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy) 1445 { 1446 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1447 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1448 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0; 1449 PetscErrorCode ierr; 1450 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1451 1452 PetscFunctionBegin; 1453 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1454 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1455 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1456 1457 for (i=0; i<m; i++) { 1458 idx = a->j + a->i[i] ; 1459 v = a->a + a->i[i] ; 1460 n = a->i[i+1] - a->i[i]; 1461 alpha1 = x[9*i]; 1462 alpha2 = x[9*i+1]; 1463 alpha3 = x[9*i+2]; 1464 alpha4 = x[9*i+3]; 1465 alpha5 = x[9*i+4]; 1466 alpha6 = x[9*i+5]; 1467 alpha7 = x[9*i+6]; 1468 alpha8 = x[9*i+7]; 1469 alpha9 = x[9*i+8]; 1470 while (n-->0) { 1471 y[9*(*idx)] += alpha1*(*v); 1472 y[9*(*idx)+1] += alpha2*(*v); 1473 y[9*(*idx)+2] += alpha3*(*v); 1474 y[9*(*idx)+3] += alpha4*(*v); 1475 y[9*(*idx)+4] += alpha5*(*v); 1476 y[9*(*idx)+5] += alpha6*(*v); 1477 y[9*(*idx)+6] += alpha7*(*v); 1478 y[9*(*idx)+7] += alpha8*(*v); 1479 y[9*(*idx)+8] += alpha9*(*v); 1480 idx++; v++; 1481 } 1482 } 1483 ierr = PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);CHKERRQ(ierr); 1484 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1485 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1486 PetscFunctionReturn(0); 1487 } 1488 1489 #undef __FUNCT__ 1490 #define __FUNCT__ "MatMultAdd_SeqMAIJ_9" 1491 PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1492 { 1493 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1494 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1495 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9; 1496 PetscErrorCode ierr; 1497 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1498 PetscInt n,i,jrow,j; 1499 1500 PetscFunctionBegin; 1501 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1502 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1503 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1504 idx = a->j; 1505 v = a->a; 1506 ii = a->i; 1507 1508 for (i=0; i<m; i++) { 1509 jrow = ii[i]; 1510 n = ii[i+1] - jrow; 1511 sum1 = 0.0; 1512 sum2 = 0.0; 1513 sum3 = 0.0; 1514 sum4 = 0.0; 1515 sum5 = 0.0; 1516 sum6 = 0.0; 1517 sum7 = 0.0; 1518 sum8 = 0.0; 1519 sum9 = 0.0; 1520 for (j=0; j<n; j++) { 1521 sum1 += v[jrow]*x[9*idx[jrow]]; 1522 sum2 += v[jrow]*x[9*idx[jrow]+1]; 1523 sum3 += v[jrow]*x[9*idx[jrow]+2]; 1524 sum4 += v[jrow]*x[9*idx[jrow]+3]; 1525 sum5 += v[jrow]*x[9*idx[jrow]+4]; 1526 sum6 += v[jrow]*x[9*idx[jrow]+5]; 1527 sum7 += v[jrow]*x[9*idx[jrow]+6]; 1528 sum8 += v[jrow]*x[9*idx[jrow]+7]; 1529 sum9 += v[jrow]*x[9*idx[jrow]+8]; 1530 jrow++; 1531 } 1532 y[9*i] += sum1; 1533 y[9*i+1] += sum2; 1534 y[9*i+2] += sum3; 1535 y[9*i+3] += sum4; 1536 y[9*i+4] += sum5; 1537 y[9*i+5] += sum6; 1538 y[9*i+6] += sum7; 1539 y[9*i+7] += sum8; 1540 y[9*i+8] += sum9; 1541 } 1542 1543 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1544 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1545 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #undef __FUNCT__ 1550 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_9" 1551 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz) 1552 { 1553 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1554 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1555 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9; 1556 PetscErrorCode ierr; 1557 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1558 1559 PetscFunctionBegin; 1560 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1561 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1562 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1563 for (i=0; i<m; i++) { 1564 idx = a->j + a->i[i] ; 1565 v = a->a + a->i[i] ; 1566 n = a->i[i+1] - a->i[i]; 1567 alpha1 = x[9*i]; 1568 alpha2 = x[9*i+1]; 1569 alpha3 = x[9*i+2]; 1570 alpha4 = x[9*i+3]; 1571 alpha5 = x[9*i+4]; 1572 alpha6 = x[9*i+5]; 1573 alpha7 = x[9*i+6]; 1574 alpha8 = x[9*i+7]; 1575 alpha9 = x[9*i+8]; 1576 while (n-->0) { 1577 y[9*(*idx)] += alpha1*(*v); 1578 y[9*(*idx)+1] += alpha2*(*v); 1579 y[9*(*idx)+2] += alpha3*(*v); 1580 y[9*(*idx)+3] += alpha4*(*v); 1581 y[9*(*idx)+4] += alpha5*(*v); 1582 y[9*(*idx)+5] += alpha6*(*v); 1583 y[9*(*idx)+6] += alpha7*(*v); 1584 y[9*(*idx)+7] += alpha8*(*v); 1585 y[9*(*idx)+8] += alpha9*(*v); 1586 idx++; v++; 1587 } 1588 } 1589 ierr = PetscLogFlops(18*a->nz);CHKERRQ(ierr); 1590 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1591 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1592 PetscFunctionReturn(0); 1593 } 1594 /*--------------------------------------------------------------------------------------------*/ 1595 #undef __FUNCT__ 1596 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1597 PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1598 { 1599 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1600 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1601 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1602 PetscErrorCode ierr; 1603 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1604 PetscInt n,i,jrow,j; 1605 1606 PetscFunctionBegin; 1607 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1608 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1609 idx = a->j; 1610 v = a->a; 1611 ii = a->i; 1612 1613 for (i=0; i<m; i++) { 1614 jrow = ii[i]; 1615 n = ii[i+1] - jrow; 1616 sum1 = 0.0; 1617 sum2 = 0.0; 1618 sum3 = 0.0; 1619 sum4 = 0.0; 1620 sum5 = 0.0; 1621 sum6 = 0.0; 1622 sum7 = 0.0; 1623 sum8 = 0.0; 1624 sum9 = 0.0; 1625 sum10 = 0.0; 1626 for (j=0; j<n; j++) { 1627 sum1 += v[jrow]*x[10*idx[jrow]]; 1628 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1629 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1630 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1631 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1632 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1633 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1634 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1635 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1636 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1637 jrow++; 1638 } 1639 y[10*i] = sum1; 1640 y[10*i+1] = sum2; 1641 y[10*i+2] = sum3; 1642 y[10*i+3] = sum4; 1643 y[10*i+4] = sum5; 1644 y[10*i+5] = sum6; 1645 y[10*i+6] = sum7; 1646 y[10*i+7] = sum8; 1647 y[10*i+8] = sum9; 1648 y[10*i+9] = sum10; 1649 } 1650 1651 ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1652 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1653 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1654 PetscFunctionReturn(0); 1655 } 1656 1657 #undef __FUNCT__ 1658 #define __FUNCT__ "MatMult_SeqMAIJ_10" 1659 PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1660 { 1661 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1662 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1663 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10; 1664 PetscErrorCode ierr; 1665 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1666 PetscInt n,i,jrow,j; 1667 1668 PetscFunctionBegin; 1669 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1670 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1671 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1672 idx = a->j; 1673 v = a->a; 1674 ii = a->i; 1675 1676 for (i=0; i<m; i++) { 1677 jrow = ii[i]; 1678 n = ii[i+1] - jrow; 1679 sum1 = 0.0; 1680 sum2 = 0.0; 1681 sum3 = 0.0; 1682 sum4 = 0.0; 1683 sum5 = 0.0; 1684 sum6 = 0.0; 1685 sum7 = 0.0; 1686 sum8 = 0.0; 1687 sum9 = 0.0; 1688 sum10 = 0.0; 1689 for (j=0; j<n; j++) { 1690 sum1 += v[jrow]*x[10*idx[jrow]]; 1691 sum2 += v[jrow]*x[10*idx[jrow]+1]; 1692 sum3 += v[jrow]*x[10*idx[jrow]+2]; 1693 sum4 += v[jrow]*x[10*idx[jrow]+3]; 1694 sum5 += v[jrow]*x[10*idx[jrow]+4]; 1695 sum6 += v[jrow]*x[10*idx[jrow]+5]; 1696 sum7 += v[jrow]*x[10*idx[jrow]+6]; 1697 sum8 += v[jrow]*x[10*idx[jrow]+7]; 1698 sum9 += v[jrow]*x[10*idx[jrow]+8]; 1699 sum10 += v[jrow]*x[10*idx[jrow]+9]; 1700 jrow++; 1701 } 1702 y[10*i] += sum1; 1703 y[10*i+1] += sum2; 1704 y[10*i+2] += sum3; 1705 y[10*i+3] += sum4; 1706 y[10*i+4] += sum5; 1707 y[10*i+5] += sum6; 1708 y[10*i+6] += sum7; 1709 y[10*i+7] += sum8; 1710 y[10*i+8] += sum9; 1711 y[10*i+9] += sum10; 1712 } 1713 1714 ierr = PetscLogFlops(20*a->nz - 10*m);CHKERRQ(ierr); 1715 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1716 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1717 PetscFunctionReturn(0); 1718 } 1719 1720 #undef __FUNCT__ 1721 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_10" 1722 PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy) 1723 { 1724 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1725 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1726 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0; 1727 PetscErrorCode ierr; 1728 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1729 1730 PetscFunctionBegin; 1731 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1732 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1733 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1734 1735 for (i=0; i<m; i++) { 1736 idx = a->j + a->i[i] ; 1737 v = a->a + a->i[i] ; 1738 n = a->i[i+1] - a->i[i]; 1739 alpha1 = x[10*i]; 1740 alpha2 = x[10*i+1]; 1741 alpha3 = x[10*i+2]; 1742 alpha4 = x[10*i+3]; 1743 alpha5 = x[10*i+4]; 1744 alpha6 = x[10*i+5]; 1745 alpha7 = x[10*i+6]; 1746 alpha8 = x[10*i+7]; 1747 alpha9 = x[10*i+8]; 1748 alpha10 = x[10*i+9]; 1749 while (n-->0) { 1750 y[10*(*idx)] += alpha1*(*v); 1751 y[10*(*idx)+1] += alpha2*(*v); 1752 y[10*(*idx)+2] += alpha3*(*v); 1753 y[10*(*idx)+3] += alpha4*(*v); 1754 y[10*(*idx)+4] += alpha5*(*v); 1755 y[10*(*idx)+5] += alpha6*(*v); 1756 y[10*(*idx)+6] += alpha7*(*v); 1757 y[10*(*idx)+7] += alpha8*(*v); 1758 y[10*(*idx)+8] += alpha9*(*v); 1759 y[10*(*idx)+9] += alpha10*(*v); 1760 idx++; v++; 1761 } 1762 } 1763 ierr = PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);CHKERRQ(ierr); 1764 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1765 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1766 PetscFunctionReturn(0); 1767 } 1768 1769 #undef __FUNCT__ 1770 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_10" 1771 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz) 1772 { 1773 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1774 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1775 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10; 1776 PetscErrorCode ierr; 1777 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1778 1779 PetscFunctionBegin; 1780 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1781 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1782 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1783 for (i=0; i<m; i++) { 1784 idx = a->j + a->i[i] ; 1785 v = a->a + a->i[i] ; 1786 n = a->i[i+1] - a->i[i]; 1787 alpha1 = x[10*i]; 1788 alpha2 = x[10*i+1]; 1789 alpha3 = x[10*i+2]; 1790 alpha4 = x[10*i+3]; 1791 alpha5 = x[10*i+4]; 1792 alpha6 = x[10*i+5]; 1793 alpha7 = x[10*i+6]; 1794 alpha8 = x[10*i+7]; 1795 alpha9 = x[10*i+8]; 1796 alpha10 = x[10*i+9]; 1797 while (n-->0) { 1798 y[10*(*idx)] += alpha1*(*v); 1799 y[10*(*idx)+1] += alpha2*(*v); 1800 y[10*(*idx)+2] += alpha3*(*v); 1801 y[10*(*idx)+3] += alpha4*(*v); 1802 y[10*(*idx)+4] += alpha5*(*v); 1803 y[10*(*idx)+5] += alpha6*(*v); 1804 y[10*(*idx)+6] += alpha7*(*v); 1805 y[10*(*idx)+7] += alpha8*(*v); 1806 y[10*(*idx)+8] += alpha9*(*v); 1807 y[10*(*idx)+9] += alpha10*(*v); 1808 idx++; v++; 1809 } 1810 } 1811 ierr = PetscLogFlops(20*a->nz);CHKERRQ(ierr); 1812 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1813 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 1814 PetscFunctionReturn(0); 1815 } 1816 1817 1818 /*--------------------------------------------------------------------------------------------*/ 1819 #undef __FUNCT__ 1820 #define __FUNCT__ "MatMult_SeqMAIJ_16" 1821 PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1822 { 1823 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1824 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1825 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1826 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1827 PetscErrorCode ierr; 1828 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1829 PetscInt n,i,jrow,j; 1830 1831 PetscFunctionBegin; 1832 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1833 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1834 idx = a->j; 1835 v = a->a; 1836 ii = a->i; 1837 1838 for (i=0; i<m; i++) { 1839 jrow = ii[i]; 1840 n = ii[i+1] - jrow; 1841 sum1 = 0.0; 1842 sum2 = 0.0; 1843 sum3 = 0.0; 1844 sum4 = 0.0; 1845 sum5 = 0.0; 1846 sum6 = 0.0; 1847 sum7 = 0.0; 1848 sum8 = 0.0; 1849 sum9 = 0.0; 1850 sum10 = 0.0; 1851 sum11 = 0.0; 1852 sum12 = 0.0; 1853 sum13 = 0.0; 1854 sum14 = 0.0; 1855 sum15 = 0.0; 1856 sum16 = 0.0; 1857 for (j=0; j<n; j++) { 1858 sum1 += v[jrow]*x[16*idx[jrow]]; 1859 sum2 += v[jrow]*x[16*idx[jrow]+1]; 1860 sum3 += v[jrow]*x[16*idx[jrow]+2]; 1861 sum4 += v[jrow]*x[16*idx[jrow]+3]; 1862 sum5 += v[jrow]*x[16*idx[jrow]+4]; 1863 sum6 += v[jrow]*x[16*idx[jrow]+5]; 1864 sum7 += v[jrow]*x[16*idx[jrow]+6]; 1865 sum8 += v[jrow]*x[16*idx[jrow]+7]; 1866 sum9 += v[jrow]*x[16*idx[jrow]+8]; 1867 sum10 += v[jrow]*x[16*idx[jrow]+9]; 1868 sum11 += v[jrow]*x[16*idx[jrow]+10]; 1869 sum12 += v[jrow]*x[16*idx[jrow]+11]; 1870 sum13 += v[jrow]*x[16*idx[jrow]+12]; 1871 sum14 += v[jrow]*x[16*idx[jrow]+13]; 1872 sum15 += v[jrow]*x[16*idx[jrow]+14]; 1873 sum16 += v[jrow]*x[16*idx[jrow]+15]; 1874 jrow++; 1875 } 1876 y[16*i] = sum1; 1877 y[16*i+1] = sum2; 1878 y[16*i+2] = sum3; 1879 y[16*i+3] = sum4; 1880 y[16*i+4] = sum5; 1881 y[16*i+5] = sum6; 1882 y[16*i+6] = sum7; 1883 y[16*i+7] = sum8; 1884 y[16*i+8] = sum9; 1885 y[16*i+9] = sum10; 1886 y[16*i+10] = sum11; 1887 y[16*i+11] = sum12; 1888 y[16*i+12] = sum13; 1889 y[16*i+13] = sum14; 1890 y[16*i+14] = sum15; 1891 y[16*i+15] = sum16; 1892 } 1893 1894 ierr = PetscLogFlops(32*a->nz - 16*m);CHKERRQ(ierr); 1895 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1896 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1897 PetscFunctionReturn(0); 1898 } 1899 1900 #undef __FUNCT__ 1901 #define __FUNCT__ "MatMultTranspose_SeqMAIJ_16" 1902 PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy) 1903 { 1904 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1905 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1906 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0; 1907 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 1908 PetscErrorCode ierr; 1909 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 1910 1911 PetscFunctionBegin; 1912 ierr = VecSet(yy,zero);CHKERRQ(ierr); 1913 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1914 ierr = VecGetArray(yy,&y);CHKERRQ(ierr); 1915 1916 for (i=0; i<m; i++) { 1917 idx = a->j + a->i[i] ; 1918 v = a->a + a->i[i] ; 1919 n = a->i[i+1] - a->i[i]; 1920 alpha1 = x[16*i]; 1921 alpha2 = x[16*i+1]; 1922 alpha3 = x[16*i+2]; 1923 alpha4 = x[16*i+3]; 1924 alpha5 = x[16*i+4]; 1925 alpha6 = x[16*i+5]; 1926 alpha7 = x[16*i+6]; 1927 alpha8 = x[16*i+7]; 1928 alpha9 = x[16*i+8]; 1929 alpha10 = x[16*i+9]; 1930 alpha11 = x[16*i+10]; 1931 alpha12 = x[16*i+11]; 1932 alpha13 = x[16*i+12]; 1933 alpha14 = x[16*i+13]; 1934 alpha15 = x[16*i+14]; 1935 alpha16 = x[16*i+15]; 1936 while (n-->0) { 1937 y[16*(*idx)] += alpha1*(*v); 1938 y[16*(*idx)+1] += alpha2*(*v); 1939 y[16*(*idx)+2] += alpha3*(*v); 1940 y[16*(*idx)+3] += alpha4*(*v); 1941 y[16*(*idx)+4] += alpha5*(*v); 1942 y[16*(*idx)+5] += alpha6*(*v); 1943 y[16*(*idx)+6] += alpha7*(*v); 1944 y[16*(*idx)+7] += alpha8*(*v); 1945 y[16*(*idx)+8] += alpha9*(*v); 1946 y[16*(*idx)+9] += alpha10*(*v); 1947 y[16*(*idx)+10] += alpha11*(*v); 1948 y[16*(*idx)+11] += alpha12*(*v); 1949 y[16*(*idx)+12] += alpha13*(*v); 1950 y[16*(*idx)+13] += alpha14*(*v); 1951 y[16*(*idx)+14] += alpha15*(*v); 1952 y[16*(*idx)+15] += alpha16*(*v); 1953 idx++; v++; 1954 } 1955 } 1956 ierr = PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);CHKERRQ(ierr); 1957 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 1958 ierr = VecRestoreArray(yy,&y);CHKERRQ(ierr); 1959 PetscFunctionReturn(0); 1960 } 1961 1962 #undef __FUNCT__ 1963 #define __FUNCT__ "MatMultAdd_SeqMAIJ_16" 1964 PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 1965 { 1966 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 1967 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 1968 PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8; 1969 PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16; 1970 PetscErrorCode ierr; 1971 PetscInt m = b->AIJ->rmap.n,*idx,*ii; 1972 PetscInt n,i,jrow,j; 1973 1974 PetscFunctionBegin; 1975 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 1976 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 1977 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 1978 idx = a->j; 1979 v = a->a; 1980 ii = a->i; 1981 1982 for (i=0; i<m; i++) { 1983 jrow = ii[i]; 1984 n = ii[i+1] - jrow; 1985 sum1 = 0.0; 1986 sum2 = 0.0; 1987 sum3 = 0.0; 1988 sum4 = 0.0; 1989 sum5 = 0.0; 1990 sum6 = 0.0; 1991 sum7 = 0.0; 1992 sum8 = 0.0; 1993 sum9 = 0.0; 1994 sum10 = 0.0; 1995 sum11 = 0.0; 1996 sum12 = 0.0; 1997 sum13 = 0.0; 1998 sum14 = 0.0; 1999 sum15 = 0.0; 2000 sum16 = 0.0; 2001 for (j=0; j<n; j++) { 2002 sum1 += v[jrow]*x[16*idx[jrow]]; 2003 sum2 += v[jrow]*x[16*idx[jrow]+1]; 2004 sum3 += v[jrow]*x[16*idx[jrow]+2]; 2005 sum4 += v[jrow]*x[16*idx[jrow]+3]; 2006 sum5 += v[jrow]*x[16*idx[jrow]+4]; 2007 sum6 += v[jrow]*x[16*idx[jrow]+5]; 2008 sum7 += v[jrow]*x[16*idx[jrow]+6]; 2009 sum8 += v[jrow]*x[16*idx[jrow]+7]; 2010 sum9 += v[jrow]*x[16*idx[jrow]+8]; 2011 sum10 += v[jrow]*x[16*idx[jrow]+9]; 2012 sum11 += v[jrow]*x[16*idx[jrow]+10]; 2013 sum12 += v[jrow]*x[16*idx[jrow]+11]; 2014 sum13 += v[jrow]*x[16*idx[jrow]+12]; 2015 sum14 += v[jrow]*x[16*idx[jrow]+13]; 2016 sum15 += v[jrow]*x[16*idx[jrow]+14]; 2017 sum16 += v[jrow]*x[16*idx[jrow]+15]; 2018 jrow++; 2019 } 2020 y[16*i] += sum1; 2021 y[16*i+1] += sum2; 2022 y[16*i+2] += sum3; 2023 y[16*i+3] += sum4; 2024 y[16*i+4] += sum5; 2025 y[16*i+5] += sum6; 2026 y[16*i+6] += sum7; 2027 y[16*i+7] += sum8; 2028 y[16*i+8] += sum9; 2029 y[16*i+9] += sum10; 2030 y[16*i+10] += sum11; 2031 y[16*i+11] += sum12; 2032 y[16*i+12] += sum13; 2033 y[16*i+13] += sum14; 2034 y[16*i+14] += sum15; 2035 y[16*i+15] += sum16; 2036 } 2037 2038 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2039 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2040 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2041 PetscFunctionReturn(0); 2042 } 2043 2044 #undef __FUNCT__ 2045 #define __FUNCT__ "MatMultTransposeAdd_SeqMAIJ_16" 2046 PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz) 2047 { 2048 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2049 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 2050 PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8; 2051 PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16; 2052 PetscErrorCode ierr; 2053 PetscInt m = b->AIJ->rmap.n,n,i,*idx; 2054 2055 PetscFunctionBegin; 2056 if (yy != zz) {ierr = VecCopy(yy,zz);CHKERRQ(ierr);} 2057 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 2058 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 2059 for (i=0; i<m; i++) { 2060 idx = a->j + a->i[i] ; 2061 v = a->a + a->i[i] ; 2062 n = a->i[i+1] - a->i[i]; 2063 alpha1 = x[16*i]; 2064 alpha2 = x[16*i+1]; 2065 alpha3 = x[16*i+2]; 2066 alpha4 = x[16*i+3]; 2067 alpha5 = x[16*i+4]; 2068 alpha6 = x[16*i+5]; 2069 alpha7 = x[16*i+6]; 2070 alpha8 = x[16*i+7]; 2071 alpha9 = x[16*i+8]; 2072 alpha10 = x[16*i+9]; 2073 alpha11 = x[16*i+10]; 2074 alpha12 = x[16*i+11]; 2075 alpha13 = x[16*i+12]; 2076 alpha14 = x[16*i+13]; 2077 alpha15 = x[16*i+14]; 2078 alpha16 = x[16*i+15]; 2079 while (n-->0) { 2080 y[16*(*idx)] += alpha1*(*v); 2081 y[16*(*idx)+1] += alpha2*(*v); 2082 y[16*(*idx)+2] += alpha3*(*v); 2083 y[16*(*idx)+3] += alpha4*(*v); 2084 y[16*(*idx)+4] += alpha5*(*v); 2085 y[16*(*idx)+5] += alpha6*(*v); 2086 y[16*(*idx)+6] += alpha7*(*v); 2087 y[16*(*idx)+7] += alpha8*(*v); 2088 y[16*(*idx)+8] += alpha9*(*v); 2089 y[16*(*idx)+9] += alpha10*(*v); 2090 y[16*(*idx)+10] += alpha11*(*v); 2091 y[16*(*idx)+11] += alpha12*(*v); 2092 y[16*(*idx)+12] += alpha13*(*v); 2093 y[16*(*idx)+13] += alpha14*(*v); 2094 y[16*(*idx)+14] += alpha15*(*v); 2095 y[16*(*idx)+15] += alpha16*(*v); 2096 idx++; v++; 2097 } 2098 } 2099 ierr = PetscLogFlops(32*a->nz);CHKERRQ(ierr); 2100 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 2101 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 2102 PetscFunctionReturn(0); 2103 } 2104 2105 /*===================================================================================*/ 2106 #undef __FUNCT__ 2107 #define __FUNCT__ "MatMult_MPIMAIJ_dof" 2108 PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2109 { 2110 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2111 PetscErrorCode ierr; 2112 2113 PetscFunctionBegin; 2114 /* start the scatter */ 2115 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2116 ierr = (*b->AIJ->ops->mult)(b->AIJ,xx,yy);CHKERRQ(ierr); 2117 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2118 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);CHKERRQ(ierr); 2119 PetscFunctionReturn(0); 2120 } 2121 2122 #undef __FUNCT__ 2123 #define __FUNCT__ "MatMultTranspose_MPIMAIJ_dof" 2124 PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy) 2125 { 2126 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2127 PetscErrorCode ierr; 2128 2129 PetscFunctionBegin; 2130 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2131 ierr = (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);CHKERRQ(ierr); 2132 ierr = VecScatterBegin(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2133 ierr = VecScatterEnd(b->ctx,b->w,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2134 PetscFunctionReturn(0); 2135 } 2136 2137 #undef __FUNCT__ 2138 #define __FUNCT__ "MatMultAdd_MPIMAIJ_dof" 2139 PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2140 { 2141 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 /* start the scatter */ 2146 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2147 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2148 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 2149 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 2150 PetscFunctionReturn(0); 2151 } 2152 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatMultTransposeAdd_MPIMAIJ_dof" 2155 PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz) 2156 { 2157 Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data; 2158 PetscErrorCode ierr; 2159 2160 PetscFunctionBegin; 2161 ierr = (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);CHKERRQ(ierr); 2162 ierr = VecScatterBegin(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2163 ierr = (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);CHKERRQ(ierr); 2164 ierr = VecScatterEnd(b->ctx,b->w,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 2165 PetscFunctionReturn(0); 2166 } 2167 2168 /* ----------------------------------------------------------------*/ 2169 #undef __FUNCT__ 2170 #define __FUNCT__ "MatPtAPSymbolic_SeqAIJ_SeqMAIJ" 2171 PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2172 { 2173 /* This routine requires testing -- but it's getting better. */ 2174 PetscErrorCode ierr; 2175 PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL; 2176 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2177 Mat P=pp->AIJ; 2178 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c; 2179 PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj; 2180 PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj; 2181 PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn; 2182 PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi; 2183 MatScalar *ca; 2184 2185 PetscFunctionBegin; 2186 /* Start timer */ 2187 ierr = PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2188 2189 /* Get ij structure of P^T */ 2190 ierr = MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2191 2192 cn = pn*ppdof; 2193 /* Allocate ci array, arrays for fill computation and */ 2194 /* free space for accumulating nonzero column info */ 2195 ierr = PetscMalloc((cn+1)*sizeof(PetscInt),&ci);CHKERRQ(ierr); 2196 ci[0] = 0; 2197 2198 /* Work arrays for rows of P^T*A */ 2199 ierr = PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);CHKERRQ(ierr); 2200 ierr = PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));CHKERRQ(ierr); 2201 ptasparserow = ptadenserow + an; 2202 denserow = ptasparserow + an; 2203 sparserow = denserow + cn; 2204 2205 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 2206 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 2207 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 2208 ierr = PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space); 2209 current_space = free_space; 2210 2211 /* Determine symbolic info for each row of C: */ 2212 for (i=0;i<pn;i++) { 2213 ptnzi = pti[i+1] - pti[i]; 2214 ptJ = ptj + pti[i]; 2215 for (dof=0;dof<ppdof;dof++) { 2216 ptanzi = 0; 2217 /* Determine symbolic row of PtA: */ 2218 for (j=0;j<ptnzi;j++) { 2219 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 2220 arow = ptJ[j]*ppdof + dof; 2221 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 2222 anzj = ai[arow+1] - ai[arow]; 2223 ajj = aj + ai[arow]; 2224 for (k=0;k<anzj;k++) { 2225 if (!ptadenserow[ajj[k]]) { 2226 ptadenserow[ajj[k]] = -1; 2227 ptasparserow[ptanzi++] = ajj[k]; 2228 } 2229 } 2230 } 2231 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 2232 ptaj = ptasparserow; 2233 cnzi = 0; 2234 for (j=0;j<ptanzi;j++) { 2235 /* Get offset within block of P */ 2236 pshift = *ptaj%ppdof; 2237 /* Get block row of P */ 2238 prow = (*ptaj++)/ppdof; /* integer division */ 2239 /* P has same number of nonzeros per row as the compressed form */ 2240 pnzj = pi[prow+1] - pi[prow]; 2241 pjj = pj + pi[prow]; 2242 for (k=0;k<pnzj;k++) { 2243 /* Locations in C are shifted by the offset within the block */ 2244 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 2245 if (!denserow[pjj[k]*ppdof+pshift]) { 2246 denserow[pjj[k]*ppdof+pshift] = -1; 2247 sparserow[cnzi++] = pjj[k]*ppdof+pshift; 2248 } 2249 } 2250 } 2251 2252 /* sort sparserow */ 2253 ierr = PetscSortInt(cnzi,sparserow);CHKERRQ(ierr); 2254 2255 /* If free space is not available, make more free space */ 2256 /* Double the amount of total space in the list */ 2257 if (current_space->local_remaining<cnzi) { 2258 ierr = PetscFreeSpaceGet(current_space->total_array_size,¤t_space);CHKERRQ(ierr); 2259 } 2260 2261 /* Copy data into free space, and zero out denserows */ 2262 ierr = PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));CHKERRQ(ierr); 2263 current_space->array += cnzi; 2264 current_space->local_used += cnzi; 2265 current_space->local_remaining -= cnzi; 2266 2267 for (j=0;j<ptanzi;j++) { 2268 ptadenserow[ptasparserow[j]] = 0; 2269 } 2270 for (j=0;j<cnzi;j++) { 2271 denserow[sparserow[j]] = 0; 2272 } 2273 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 2274 /* For now, we will recompute what is needed. */ 2275 ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi; 2276 } 2277 } 2278 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 2279 /* Allocate space for cj, initialize cj, and */ 2280 /* destroy list of free space and other temporary array(s) */ 2281 ierr = PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);CHKERRQ(ierr); 2282 ierr = PetscFreeSpaceContiguous(&free_space,cj);CHKERRQ(ierr); 2283 ierr = PetscFree(ptadenserow);CHKERRQ(ierr); 2284 2285 /* Allocate space for ca */ 2286 ierr = PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);CHKERRQ(ierr); 2287 ierr = PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));CHKERRQ(ierr); 2288 2289 /* put together the new matrix */ 2290 ierr = MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);CHKERRQ(ierr); 2291 2292 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 2293 /* Since these are PETSc arrays, change flags to free them as necessary. */ 2294 c = (Mat_SeqAIJ *)((*C)->data); 2295 c->free_a = PETSC_TRUE; 2296 c->free_ij = PETSC_TRUE; 2297 c->nonew = 0; 2298 2299 /* Clean up. */ 2300 ierr = MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);CHKERRQ(ierr); 2301 2302 ierr = PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);CHKERRQ(ierr); 2303 PetscFunctionReturn(0); 2304 } 2305 2306 #undef __FUNCT__ 2307 #define __FUNCT__ "MatPtAPNumeric_SeqAIJ_SeqMAIJ" 2308 PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C) 2309 { 2310 /* This routine requires testing -- first draft only */ 2311 PetscErrorCode ierr; 2312 PetscInt flops=0; 2313 Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data; 2314 Mat P=pp->AIJ; 2315 Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data; 2316 Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data; 2317 Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data; 2318 PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj; 2319 PetscInt *ci=c->i,*cj=c->j,*cjj; 2320 PetscInt am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof; 2321 PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow; 2322 MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj; 2323 2324 PetscFunctionBegin; 2325 /* Allocate temporary array for storage of one row of A*P */ 2326 ierr = PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);CHKERRQ(ierr); 2327 ierr = PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));CHKERRQ(ierr); 2328 2329 apj = (PetscInt *)(apa + cn); 2330 apjdense = apj + cn; 2331 2332 /* Clear old values in C */ 2333 ierr = PetscMemzero(ca,ci[cm]*sizeof(MatScalar));CHKERRQ(ierr); 2334 2335 for (i=0;i<am;i++) { 2336 /* Form sparse row of A*P */ 2337 anzi = ai[i+1] - ai[i]; 2338 apnzj = 0; 2339 for (j=0;j<anzi;j++) { 2340 /* Get offset within block of P */ 2341 pshift = *aj%ppdof; 2342 /* Get block row of P */ 2343 prow = *aj++/ppdof; /* integer division */ 2344 pnzj = pi[prow+1] - pi[prow]; 2345 pjj = pj + pi[prow]; 2346 paj = pa + pi[prow]; 2347 for (k=0;k<pnzj;k++) { 2348 poffset = pjj[k]*ppdof+pshift; 2349 if (!apjdense[poffset]) { 2350 apjdense[poffset] = -1; 2351 apj[apnzj++] = poffset; 2352 } 2353 apa[poffset] += (*aa)*paj[k]; 2354 } 2355 flops += 2*pnzj; 2356 aa++; 2357 } 2358 2359 /* Sort the j index array for quick sparse axpy. */ 2360 /* Note: a array does not need sorting as it is in dense storage locations. */ 2361 ierr = PetscSortInt(apnzj,apj);CHKERRQ(ierr); 2362 2363 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 2364 prow = i/ppdof; /* integer division */ 2365 pshift = i%ppdof; 2366 poffset = pi[prow]; 2367 pnzi = pi[prow+1] - poffset; 2368 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 2369 pJ = pj+poffset; 2370 pA = pa+poffset; 2371 for (j=0;j<pnzi;j++) { 2372 crow = (*pJ)*ppdof+pshift; 2373 cjj = cj + ci[crow]; 2374 caj = ca + ci[crow]; 2375 pJ++; 2376 /* Perform sparse axpy operation. Note cjj includes apj. */ 2377 for (k=0,nextap=0;nextap<apnzj;k++) { 2378 if (cjj[k]==apj[nextap]) { 2379 caj[k] += (*pA)*apa[apj[nextap++]]; 2380 } 2381 } 2382 flops += 2*apnzj; 2383 pA++; 2384 } 2385 2386 /* Zero the current row info for A*P */ 2387 for (j=0;j<apnzj;j++) { 2388 apa[apj[j]] = 0.; 2389 apjdense[apj[j]] = 0; 2390 } 2391 } 2392 2393 /* Assemble the final matrix and clean up */ 2394 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2395 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2396 ierr = PetscFree(apa);CHKERRQ(ierr); 2397 ierr = PetscLogFlops(flops);CHKERRQ(ierr); 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #undef __FUNCT__ 2402 #define __FUNCT__ "MatPtAPSymbolic_MPIAIJ_MPIMAIJ" 2403 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C) 2404 { 2405 PetscErrorCode ierr; 2406 2407 PetscFunctionBegin; 2408 /* MatPtAPSymbolic_MPIAIJ_MPIMAIJ() is not implemented yet. Convert PP to mpiaij format */ 2409 ierr = MatConvert(PP,MATMPIAIJ,MAT_REUSE_MATRIX,&PP);CHKERRQ(ierr); 2410 ierr =(*PP->ops->ptapsymbolic)(A,PP,fill,C);CHKERRQ(ierr); 2411 PetscFunctionReturn(0); 2412 } 2413 2414 #undef __FUNCT__ 2415 #define __FUNCT__ "MatPtAPNumeric_MPIAIJ_MPIMAIJ" 2416 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ(Mat A,Mat PP,Mat C) 2417 { 2418 PetscFunctionBegin; 2419 SETERRQ(PETSC_ERR_SUP,"MatPtAPNumeric is not implemented for MPIMAIJ matrix yet"); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 EXTERN_C_BEGIN 2424 #undef __FUNCT__ 2425 #define __FUNCT__ "MatConvert_SeqMAIJ_SeqAIJ" 2426 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2427 { 2428 Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data; 2429 Mat a = b->AIJ,B; 2430 Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data; 2431 PetscErrorCode ierr; 2432 PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof; 2433 PetscInt *cols; 2434 PetscScalar *vals; 2435 2436 PetscFunctionBegin; 2437 ierr = MatGetSize(a,&m,&n);CHKERRQ(ierr); 2438 ierr = PetscMalloc(dof*m*sizeof(PetscInt),&ilen);CHKERRQ(ierr); 2439 for (i=0; i<m; i++) { 2440 nmax = PetscMax(nmax,aij->ilen[i]); 2441 for (j=0; j<dof; j++) { 2442 ilen[dof*i+j] = aij->ilen[i]; 2443 } 2444 } 2445 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);CHKERRQ(ierr); 2446 ierr = PetscFree(ilen);CHKERRQ(ierr); 2447 ierr = PetscMalloc(nmax*sizeof(PetscInt),&icols);CHKERRQ(ierr); 2448 ii = 0; 2449 for (i=0; i<m; i++) { 2450 ierr = MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2451 for (j=0; j<dof; j++) { 2452 for (k=0; k<ncols; k++) { 2453 icols[k] = dof*cols[k]+j; 2454 } 2455 ierr = MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2456 ii++; 2457 } 2458 ierr = MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2459 } 2460 ierr = PetscFree(icols);CHKERRQ(ierr); 2461 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2462 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2463 2464 if (reuse == MAT_REUSE_MATRIX) { 2465 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2466 } else { 2467 *newmat = B; 2468 } 2469 PetscFunctionReturn(0); 2470 } 2471 EXTERN_C_END 2472 2473 #include "src/mat/impls/aij/mpi/mpiaij.h" 2474 2475 EXTERN_C_BEGIN 2476 #undef __FUNCT__ 2477 #define __FUNCT__ "MatConvert_MPIMAIJ_MPIAIJ" 2478 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 2479 { 2480 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data; 2481 Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B; 2482 Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ; 2483 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data; 2484 Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data; 2485 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data; 2486 PetscInt dof = maij->dof,i,j,*dnz = PETSC_NULL,*onz = PETSC_NULL,nmax = 0,onmax = 0; 2487 PetscInt *oicols = PETSC_NULL,*icols = PETSC_NULL,ncols,*cols = PETSC_NULL,oncols,*ocols = PETSC_NULL; 2488 PetscInt rstart,cstart,*garray,ii,k; 2489 PetscErrorCode ierr; 2490 PetscScalar *vals,*ovals; 2491 2492 PetscFunctionBegin; 2493 ierr = PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);CHKERRQ(ierr); 2494 for (i=0; i<A->rmap.n/dof; i++) { 2495 nmax = PetscMax(nmax,AIJ->ilen[i]); 2496 onmax = PetscMax(onmax,OAIJ->ilen[i]); 2497 for (j=0; j<dof; j++) { 2498 dnz[dof*i+j] = AIJ->ilen[i]; 2499 onz[dof*i+j] = OAIJ->ilen[i]; 2500 } 2501 } 2502 ierr = MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);CHKERRQ(ierr); 2503 ierr = PetscFree2(dnz,onz);CHKERRQ(ierr); 2504 2505 ierr = PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);CHKERRQ(ierr); 2506 rstart = dof*maij->A->rmap.rstart; 2507 cstart = dof*maij->A->cmap.rstart; 2508 garray = mpiaij->garray; 2509 2510 ii = rstart; 2511 for (i=0; i<A->rmap.n/dof; i++) { 2512 ierr = MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2513 ierr = MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2514 for (j=0; j<dof; j++) { 2515 for (k=0; k<ncols; k++) { 2516 icols[k] = cstart + dof*cols[k]+j; 2517 } 2518 for (k=0; k<oncols; k++) { 2519 oicols[k] = dof*garray[ocols[k]]+j; 2520 } 2521 ierr = MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);CHKERRQ(ierr); 2522 ierr = MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);CHKERRQ(ierr); 2523 ii++; 2524 } 2525 ierr = MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);CHKERRQ(ierr); 2526 ierr = MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);CHKERRQ(ierr); 2527 } 2528 ierr = PetscFree2(icols,oicols);CHKERRQ(ierr); 2529 2530 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2531 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2532 2533 if (reuse == MAT_REUSE_MATRIX) { 2534 PetscInt refct = A->refct; /* save A->refct */ 2535 A->refct = 1; 2536 ierr = MatHeaderReplace(A,B);CHKERRQ(ierr); 2537 A->refct = refct; /* restore A->refct */ 2538 } else { 2539 *newmat = B; 2540 } 2541 PetscFunctionReturn(0); 2542 } 2543 EXTERN_C_END 2544 2545 2546 /* ---------------------------------------------------------------------------------- */ 2547 /*MC 2548 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 2549 operations for multicomponent problems. It interpolates each component the same 2550 way independently. The matrix type is based on MATSEQAIJ for sequential matrices, 2551 and MATMPIAIJ for distributed matrices. 2552 2553 Operations provided: 2554 + MatMult 2555 . MatMultTranspose 2556 . MatMultAdd 2557 . MatMultTransposeAdd 2558 - MatView 2559 2560 Level: advanced 2561 2562 M*/ 2563 #undef __FUNCT__ 2564 #define __FUNCT__ "MatCreateMAIJ" 2565 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij) 2566 { 2567 PetscErrorCode ierr; 2568 PetscMPIInt size; 2569 PetscInt n; 2570 Mat_MPIMAIJ *b; 2571 Mat B; 2572 2573 PetscFunctionBegin; 2574 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2575 2576 if (dof == 1) { 2577 *maij = A; 2578 } else { 2579 ierr = MatCreate(A->comm,&B);CHKERRQ(ierr); 2580 ierr = MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);CHKERRQ(ierr); 2581 B->assembled = PETSC_TRUE; 2582 2583 ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr); 2584 if (size == 1) { 2585 ierr = MatSetType(B,MATSEQMAIJ);CHKERRQ(ierr); 2586 B->ops->destroy = MatDestroy_SeqMAIJ; 2587 B->ops->view = MatView_SeqMAIJ; 2588 b = (Mat_MPIMAIJ*)B->data; 2589 b->dof = dof; 2590 b->AIJ = A; 2591 if (dof == 2) { 2592 B->ops->mult = MatMult_SeqMAIJ_2; 2593 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 2594 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 2595 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 2596 } else if (dof == 3) { 2597 B->ops->mult = MatMult_SeqMAIJ_3; 2598 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 2599 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 2600 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 2601 } else if (dof == 4) { 2602 B->ops->mult = MatMult_SeqMAIJ_4; 2603 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 2604 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 2605 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 2606 } else if (dof == 5) { 2607 B->ops->mult = MatMult_SeqMAIJ_5; 2608 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 2609 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 2610 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 2611 } else if (dof == 6) { 2612 B->ops->mult = MatMult_SeqMAIJ_6; 2613 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 2614 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 2615 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 2616 } else if (dof == 7) { 2617 B->ops->mult = MatMult_SeqMAIJ_7; 2618 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 2619 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 2620 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 2621 } else if (dof == 8) { 2622 B->ops->mult = MatMult_SeqMAIJ_8; 2623 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 2624 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 2625 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 2626 } else if (dof == 9) { 2627 B->ops->mult = MatMult_SeqMAIJ_9; 2628 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 2629 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 2630 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 2631 } else if (dof == 10) { 2632 B->ops->mult = MatMult_SeqMAIJ_10; 2633 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 2634 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 2635 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 2636 } else if (dof == 16) { 2637 B->ops->mult = MatMult_SeqMAIJ_16; 2638 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 2639 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 2640 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 2641 } else { 2642 SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof); 2643 } 2644 B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ; 2645 B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 2646 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);CHKERRQ(ierr); 2647 } else { 2648 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 2649 IS from,to; 2650 Vec gvec; 2651 PetscInt *garray,i; 2652 2653 ierr = MatSetType(B,MATMPIMAIJ);CHKERRQ(ierr); 2654 B->ops->destroy = MatDestroy_MPIMAIJ; 2655 B->ops->view = MatView_MPIMAIJ; 2656 b = (Mat_MPIMAIJ*)B->data; 2657 b->dof = dof; 2658 b->A = A; 2659 ierr = MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);CHKERRQ(ierr); 2660 ierr = MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);CHKERRQ(ierr); 2661 2662 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 2663 ierr = VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);CHKERRQ(ierr); 2664 ierr = VecSetBlockSize(b->w,dof);CHKERRQ(ierr); 2665 2666 /* create two temporary Index sets for build scatter gather */ 2667 ierr = PetscMalloc((n+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 2668 for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i]; 2669 ierr = ISCreateBlock(A->comm,dof,n,garray,&from);CHKERRQ(ierr); 2670 ierr = PetscFree(garray);CHKERRQ(ierr); 2671 ierr = ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);CHKERRQ(ierr); 2672 2673 /* create temporary global vector to generate scatter context */ 2674 ierr = VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);CHKERRQ(ierr); 2675 ierr = VecSetBlockSize(gvec,dof);CHKERRQ(ierr); 2676 2677 /* generate the scatter context */ 2678 ierr = VecScatterCreate(gvec,from,b->w,to,&b->ctx);CHKERRQ(ierr); 2679 2680 ierr = ISDestroy(from);CHKERRQ(ierr); 2681 ierr = ISDestroy(to);CHKERRQ(ierr); 2682 ierr = VecDestroy(gvec);CHKERRQ(ierr); 2683 2684 B->ops->mult = MatMult_MPIMAIJ_dof; 2685 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 2686 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 2687 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 2688 B->ops->ptapsymbolic_mpiaij = MatPtAPSymbolic_MPIAIJ_MPIMAIJ; 2689 B->ops->ptapnumeric_mpiaij = MatPtAPNumeric_MPIAIJ_MPIMAIJ; 2690 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);CHKERRQ(ierr); 2691 } 2692 *maij = B; 2693 ierr = MatView_Private(B);CHKERRQ(ierr); 2694 } 2695 PetscFunctionReturn(0); 2696 } 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709