1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 28 29 #undef __FUNCT__ 30 #define __FUNCT__ "MatMult_SeqFFTW" 31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32 { 33 PetscErrorCode ierr; 34 Mat_FFT *fft = (Mat_FFT*)A->data; 35 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36 PetscScalar *x_array,*y_array; 37 PetscInt ndim=fft->ndim,*dim=fft->dim; 38 39 PetscFunctionBegin; 40 //#if !defined(PETSC_USE_COMPLEX) 41 42 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 43 //#endif 44 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 45 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 46 if (!fftw->p_forward){ /* create a plan, then excute it */ 47 switch (ndim){ 48 case 1: 49 #if defined(PETSC_USE_COMPLEX) 50 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 51 #else 52 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 53 #endif 54 break; 55 case 2: 56 #if defined(PETSC_USE_COMPLEX) 57 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 58 #else 59 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 60 #endif 61 break; 62 case 3: 63 #if defined(PETSC_USE_COMPLEX) 64 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 65 #else 66 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 67 #endif 68 break; 69 default: 70 #if defined(PETSC_USE_COMPLEX) 71 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 72 #else 73 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 74 #endif 75 break; 76 } 77 fftw->finarray = x_array; 78 fftw->foutarray = y_array; 79 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 80 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 81 fftw_execute(fftw->p_forward); 82 } else { /* use existing plan */ 83 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 84 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 85 } else { 86 fftw_execute(fftw->p_forward); 87 } 88 } 89 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 90 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 91 PetscFunctionReturn(0); 92 } 93 94 #undef __FUNCT__ 95 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 96 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 97 { 98 PetscErrorCode ierr; 99 Mat_FFT *fft = (Mat_FFT*)A->data; 100 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 101 PetscScalar *x_array,*y_array; 102 PetscInt ndim=fft->ndim,*dim=fft->dim; 103 104 PetscFunctionBegin; 105 //#if !defined(PETSC_USE_COMPLEX) 106 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 107 //#endif 108 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 109 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 110 if (!fftw->p_backward){ /* create a plan, then excute it */ 111 switch (ndim){ 112 case 1: 113 #if defined(PETSC_USE_COMPLEX) 114 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 115 #else 116 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 117 #endif 118 break; 119 case 2: 120 #if defined(PETSC_USE_COMPLEX) 121 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 122 #else 123 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 124 #endif 125 break; 126 case 3: 127 #if defined(PETSC_USE_COMPLEX) 128 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 129 #else 130 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 131 #endif 132 break; 133 default: 134 #if defined(PETSC_USE_COMPLEX) 135 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 136 #else 137 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 138 #endif 139 break; 140 } 141 fftw->binarray = x_array; 142 fftw->boutarray = y_array; 143 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 144 } else { /* use existing plan */ 145 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 146 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 147 } else { 148 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 149 } 150 } 151 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 152 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 153 PetscFunctionReturn(0); 154 } 155 156 #undef __FUNCT__ 157 #define __FUNCT__ "MatMult_MPIFFTW" 158 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 159 { 160 PetscErrorCode ierr; 161 Mat_FFT *fft = (Mat_FFT*)A->data; 162 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 163 PetscScalar *x_array,*y_array; 164 PetscInt ndim=fft->ndim,*dim=fft->dim; 165 MPI_Comm comm=((PetscObject)A)->comm; 166 // PetscInt ctr; 167 // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 168 // ndim1=(ptrdiff_t) ndim; 169 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 170 171 // for(ctr=0;ctr<ndim;ctr++) 172 // { 173 // pdim[ctr] = dim[ctr]; 174 // } 175 176 PetscFunctionBegin; 177 //#if !defined(PETSC_USE_COMPLEX) 178 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 179 //#endif 180 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 181 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 182 183 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 184 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 185 if (!fftw->p_forward){ /* create a plan, then excute it */ 186 switch (ndim){ 187 case 1: 188 #if defined(PETSC_USE_COMPLEX) 189 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 190 #endif 191 break; 192 case 2: 193 #if defined(PETSC_USE_COMPLEX) 194 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 195 #else 196 printf("The code comes here \n"); 197 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 198 #endif 199 break; 200 case 3: 201 #if defined(PETSC_USE_COMPLEX) 202 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 203 #else 204 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 205 #endif 206 break; 207 default: 208 #if defined(PETSC_USE_COMPLEX) 209 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 210 #else 211 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 212 #endif 213 // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 214 break; 215 } 216 fftw->finarray = x_array; 217 fftw->foutarray = y_array; 218 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 219 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 220 fftw_execute(fftw->p_forward); 221 } else { /* use existing plan */ 222 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 223 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 224 } else { 225 fftw_execute(fftw->p_forward); 226 } 227 } 228 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 229 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 230 PetscFunctionReturn(0); 231 } 232 233 #undef __FUNCT__ 234 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 235 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 236 { 237 PetscErrorCode ierr; 238 Mat_FFT *fft = (Mat_FFT*)A->data; 239 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 240 PetscScalar *x_array,*y_array; 241 PetscInt ndim=fft->ndim,*dim=fft->dim; 242 MPI_Comm comm=((PetscObject)A)->comm; 243 // PetscInt ctr; 244 // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 245 // ndim1=(ptrdiff_t) ndim; 246 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 247 248 // for(ctr=0;ctr<ndim;ctr++) 249 // { 250 // pdim[ctr] = dim[ctr]; 251 // } 252 253 PetscFunctionBegin; 254 //#if !defined(PETSC_USE_COMPLEX) 255 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 256 //#endif 257 // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 258 // should pdim be a member of Mat_FFTW? 259 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 260 261 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 262 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 263 if (!fftw->p_backward){ /* create a plan, then excute it */ 264 switch (ndim){ 265 case 1: 266 #if defined(PETSC_USE_COMPLEX) 267 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 268 #endif 269 break; 270 case 2: 271 #if defined(PETSC_USE_COMPLEX) 272 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 273 #else 274 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 275 #endif 276 break; 277 case 3: 278 #if defined(PETSC_USE_COMPLEX) 279 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 280 #else 281 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 282 #endif 283 break; 284 default: 285 #if defined(PETSC_USE_COMPLEX) 286 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 287 #else 288 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 289 #endif 290 // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 291 break; 292 } 293 fftw->binarray = x_array; 294 fftw->boutarray = y_array; 295 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 296 } else { /* use existing plan */ 297 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 298 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 299 } else { 300 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 301 } 302 } 303 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 304 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 305 PetscFunctionReturn(0); 306 } 307 308 #undef __FUNCT__ 309 #define __FUNCT__ "MatDestroy_FFTW" 310 PetscErrorCode MatDestroy_FFTW(Mat A) 311 { 312 Mat_FFT *fft = (Mat_FFT*)A->data; 313 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 314 PetscErrorCode ierr; 315 316 PetscFunctionBegin; 317 //#if !defined(PETSC_USE_COMPLEX) 318 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 319 //#endif 320 fftw_destroy_plan(fftw->p_forward); 321 fftw_destroy_plan(fftw->p_backward); 322 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 323 ierr = PetscFree(fft->data);CHKERRQ(ierr); 324 PetscFunctionReturn(0); 325 } 326 327 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 328 #undef __FUNCT__ 329 #define __FUNCT__ "VecDestroy_MPIFFTW" 330 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 331 { 332 PetscErrorCode ierr; 333 PetscScalar *array; 334 335 PetscFunctionBegin; 336 //#if !defined(PETSC_USE_COMPLEX) 337 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 338 //#endif 339 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 340 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 341 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 342 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 343 PetscFunctionReturn(0); 344 } 345 346 #undef __FUNCT__ 347 #define __FUNCT__ "MatGetVecs1DC_FFTW" 348 /* 349 MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 350 parallel layout determined by FFTW-1D 351 352 */ 353 PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bout) 354 { 355 PetscErrorCode ierr; 356 PetscMPIInt size,rank; 357 MPI_Comm comm=((PetscObject)A)->comm; 358 Mat_FFT *fft = (Mat_FFT*)A->data; 359 // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 360 PetscInt N=fft->N; 361 PetscInt ndim=fft->ndim,*dim=fft->dim; 362 ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 363 ptrdiff_t f_local_n1,f_local_1_end; 364 ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 365 ptrdiff_t b_local_n1,b_local_1_end; 366 fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 367 368 PetscFunctionBegin; 369 #if !defined(PETSC_USE_COMPLEX) 370 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 371 #endif 372 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 373 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 374 if (size == 1){ 375 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 376 } 377 else { 378 if (ndim>1){ 379 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 380 else { 381 f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 382 b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 383 if (fin) { 384 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 385 ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 386 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 387 } 388 if (fout) { 389 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 390 ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 391 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 392 } 393 if (bout) { 394 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 395 ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 396 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 397 } 398 } 399 if (fin){ 400 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 401 } 402 if (fout){ 403 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 404 } 405 if (bout){ 406 ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 407 } 408 PetscFunctionReturn(0); 409 } 410 411 412 } 413 414 #undef __FUNCT__ 415 #define __FUNCT__ "MatGetVecs_FFTW" 416 /* 417 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 418 parallel layout determined by FFTW 419 420 Collective on Mat 421 422 Input Parameter: 423 . mat - the matrix 424 425 Output Parameter: 426 + fin - (optional) input vector of forward FFTW 427 - fout - (optional) output vector of forward FFTW 428 429 Level: advanced 430 431 .seealso: MatCreateFFTW() 432 */ 433 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 434 { 435 PetscErrorCode ierr; 436 PetscMPIInt size,rank; 437 MPI_Comm comm=((PetscObject)A)->comm; 438 Mat_FFT *fft = (Mat_FFT*)A->data; 439 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 440 PetscInt N=fft->N, N1, n1,vsize; 441 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 442 443 PetscFunctionBegin; 444 //#if !defined(PETSC_USE_COMPLEX) 445 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 446 //#endif 447 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 448 PetscValidType(A,1); 449 450 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 451 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 452 if (size == 1){ /* sequential case */ 453 #if defined(PETSC_USE_COMPLEX) 454 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 455 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 456 #else 457 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 458 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 459 printf("The code successfully comes at the end of the routine with one processor\n"); 460 #endif 461 } else { /* mpi case */ 462 ptrdiff_t alloc_local,local_n0,local_0_start; 463 ptrdiff_t local_n1,local_1_end; 464 fftw_complex *data_fin,*data_fout; 465 double *data_finr ; 466 ptrdiff_t local_1_start,temp; 467 // PetscInt ctr; 468 // ptrdiff_t ndim1,*pdim; 469 // ndim1=(ptrdiff_t) ndim; 470 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 471 472 // for(ctr=0;ctr<ndim;ctr++) 473 // {k 474 // pdim[ctr] = dim[ctr]; 475 // } 476 477 478 479 switch (ndim){ 480 case 1: 481 /* Get local size */ 482 /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 483 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 484 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 485 if (fin) { 486 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 487 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 488 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 489 } 490 if (fout) { 491 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 492 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 493 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 494 } 495 break; 496 case 2: 497 #if !defined(PETSC_USE_COMPLEX) 498 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 499 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 500 if (fin) { 501 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 502 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 503 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 504 //printf("The code comes here with vector size %d\n",vsize); 505 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 506 } 507 if (fout) { 508 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 509 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 510 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 511 } 512 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 513 514 #else 515 /* Get local size */ 516 printf("Hope this does not come here"); 517 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 518 if (fin) { 519 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 520 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 521 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 522 } 523 if (fout) { 524 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 525 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 526 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 527 } 528 printf("Hope this does not come here"); 529 #endif 530 break; 531 case 3: 532 /* Get local size */ 533 #if !defined(PETSC_USE_COMPLEX) 534 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 535 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 536 if (fin) { 537 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 538 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 539 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 540 //printf("The code comes here with vector size %d\n",vsize); 541 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 542 } 543 if (fout) { 544 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 545 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 546 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 547 } 548 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 549 550 551 #else 552 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 553 // printf("The quantity n is %d",n); 554 if (fin) { 555 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 556 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 557 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 558 } 559 if (fout) { 560 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 561 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 562 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 563 } 564 #endif 565 break; 566 default: 567 /* Get local size */ 568 #if !defined(PETSC_USE_COMPLEX) 569 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 570 printf("The value of temp is %ld\n",temp); 571 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 572 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 573 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 574 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 575 if (fin) { 576 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 577 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 578 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 579 //printf("The code comes here with vector size %d\n",vsize); 580 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 581 } 582 if (fout) { 583 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 584 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 585 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 586 } 587 588 #else 589 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 590 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 591 // printf("The value of alloc local is %d",alloc_local); 592 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 593 // for(i=0;i<ndim;i++) 594 // { 595 // pdim[i]=dim[i];printf("%d",pdim[i]); 596 // } 597 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 598 // printf("The quantity n is %d",n); 599 if (fin) { 600 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 601 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 602 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 603 } 604 if (fout) { 605 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 606 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 607 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 608 } 609 #endif 610 break; 611 } 612 } 613 // if (fin){ 614 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 615 // } 616 // if (fout){ 617 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 618 // } 619 PetscFunctionReturn(0); 620 } 621 622 #undef __FUNCT__ 623 #define __FUNCT__ "InputTransformFFT" 624 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 625 { 626 PetscErrorCode ierr; 627 PetscFunctionBegin; 628 ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 629 PetscFunctionReturn(0); 630 } 631 632 /* 633 InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 634 Input A, x, y 635 A - FFTW matrix 636 x - user data 637 Options Database Keys: 638 + -mat_fftw_plannerflags - set FFTW planner flags 639 640 Level: intermediate 641 642 */ 643 644 EXTERN_C_BEGIN 645 #undef __FUNCT__ 646 #define __FUNCT__ "InputTransformFFT_FTTW" 647 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 648 { 649 PetscErrorCode ierr; 650 MPI_Comm comm=((PetscObject)A)->comm; 651 Mat_FFT *fft = (Mat_FFT*)A->data; 652 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 653 PetscInt N=fft->N, N1, n1 ,NM; 654 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 655 PetscInt low, *indx1, *indx2, tempindx, tempindx1; 656 PetscInt i,j,k,rank,size,partial_dim; 657 ptrdiff_t alloc_local,local_n0,local_0_start; 658 ptrdiff_t local_n1,local_1_start,temp; 659 VecScatter vecscat; 660 IS list1,list2; 661 662 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 663 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 664 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 665 printf("Local ownership starts at %d\n",low); 666 667 if (size==1) 668 { 669 switch (ndim){ 670 case 1: 671 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 672 for (i=0;i<dim[0];i++) 673 { 674 indx1[i] = i; 675 } 676 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 677 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 678 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 679 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 680 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 681 ierr = ISDestroy(&list1);CHKERRQ(ierr); 682 ierr = PetscFree(indx1);CHKERRQ(ierr); 683 break; 684 685 case 2: 686 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 687 for (i=0;i<dim[0];i++){ 688 for (j=0;j<dim[1];j++){ 689 indx1[i*dim[1]+j] = i*dim[1] + j; 690 } 691 } 692 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 693 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 694 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 695 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 696 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 697 ierr = ISDestroy(&list1);CHKERRQ(ierr); 698 ierr = PetscFree(indx1);CHKERRQ(ierr); 699 break; 700 case 3: 701 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 702 for (i=0;i<dim[0];i++){ 703 for (j=0;j<dim[1];j++){ 704 for (k=0;k<dim[2];k++){ 705 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 706 } 707 } 708 } 709 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 710 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 711 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 712 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 713 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 714 ierr = ISDestroy(&list1);CHKERRQ(ierr); 715 ierr = PetscFree(indx1);CHKERRQ(ierr); 716 break; 717 default: 718 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1); 719 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 720 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 721 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 722 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 723 ierr = ISDestroy(&list1);CHKERRQ(ierr); 724 //ierr = ISDestroy(list1);CHKERRQ(ierr); 725 break; 726 } 727 } 728 729 else{ 730 731 switch (ndim){ 732 case 1: 733 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 734 break; 735 case 2: 736 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 737 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 738 739 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 740 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 741 printf("Val local_0_start = %ld\n",local_0_start); 742 743 if (dim[1]%2==0) 744 NM = dim[1]+2; 745 else 746 NM = dim[1]+1; 747 748 for (i=0;i<local_n0;i++){ 749 for (j=0;j<dim[1];j++){ 750 tempindx = i*dim[1] + j; 751 tempindx1 = i*NM + j; 752 indx1[tempindx]=local_0_start*dim[1]+tempindx; 753 indx2[tempindx]=low+tempindx1; 754 // printf("Val tempindx1 = %d\n",tempindx1); 755 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 756 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 757 // printf("-------------------------\n",indx2[tempindx],rank); 758 } 759 } 760 761 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 762 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 763 764 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 765 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 766 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 767 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 768 ierr = ISDestroy(&list1);CHKERRQ(ierr); 769 ierr = ISDestroy(&list2);CHKERRQ(ierr); 770 ierr = PetscFree(indx1);CHKERRQ(ierr); 771 ierr = PetscFree(indx2);CHKERRQ(ierr); 772 break; 773 774 case 3: 775 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 776 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 777 778 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 779 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 780 printf("Val local_0_start = %ld\n",local_0_start); 781 782 if (dim[2]%2==0) 783 NM = dim[2]+2; 784 else 785 NM = dim[2]+1; 786 787 for (i=0;i<local_n0;i++){ 788 for (j=0;j<dim[1];j++){ 789 for (k=0;k<dim[2];k++){ 790 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 791 tempindx1 = i*dim[1]*NM + j*NM + k; 792 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 793 indx2[tempindx]=low+tempindx1; 794 } 795 // printf("Val tempindx1 = %d\n",tempindx1); 796 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 797 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 798 // printf("-------------------------\n",indx2[tempindx],rank); 799 } 800 } 801 802 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 803 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 804 805 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 806 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 807 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 808 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 809 ierr = ISDestroy(&list1);CHKERRQ(ierr); 810 ierr = ISDestroy(&list2);CHKERRQ(ierr); 811 ierr = PetscFree(indx1);CHKERRQ(ierr); 812 ierr = PetscFree(indx2);CHKERRQ(ierr); 813 break; 814 815 default: 816 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 817 printf("The value of temp is %ld\n",temp); 818 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 819 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 820 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 821 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 822 823 partial_dim = fftw->partial_dim; 824 printf("The value of partial dim is %d\n",partial_dim); 825 826 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 827 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 828 printf("Val local_0_start = %ld\n",local_0_start); 829 830 if (dim[ndim-1]%2==0) 831 NM = 2; 832 else 833 NM = 1; 834 835 j = low; 836 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 837 { 838 indx1[i] = local_0_start*partial_dim + i; 839 indx2[i] = j; 840 //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 841 if (k%dim[ndim-1]==0) 842 { j+=NM;} 843 j++; 844 } 845 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 846 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 847 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 848 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 849 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 850 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 851 ierr = ISDestroy(&list1);CHKERRQ(ierr); 852 ierr = ISDestroy(&list2);CHKERRQ(ierr); 853 ierr = PetscFree(indx1);CHKERRQ(ierr); 854 ierr = PetscFree(indx2);CHKERRQ(ierr); 855 break; 856 } 857 } 858 859 return 0; 860 } 861 EXTERN_C_END 862 863 #undef __FUNCT__ 864 #define __FUNCT__ "OutputTransformFFT" 865 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 866 { 867 PetscErrorCode ierr; 868 PetscFunctionBegin; 869 ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 870 PetscFunctionReturn(0); 871 } 872 873 /* 874 OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 875 Input A, x, y 876 A - FFTW matrix 877 x - FFTW vector 878 y - PETSc vector that user can read 879 Options Database Keys: 880 + -mat_fftw_plannerflags - set FFTW planner flags 881 882 Level: intermediate 883 884 */ 885 886 EXTERN_C_BEGIN 887 #undef __FUNCT__ 888 #define __FUNCT__ "OutputTransformFFT_FTTW" 889 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 890 { 891 PetscErrorCode ierr; 892 MPI_Comm comm=((PetscObject)A)->comm; 893 Mat_FFT *fft = (Mat_FFT*)A->data; 894 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 895 PetscInt N=fft->N, N1, n1 ,NM; 896 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 897 PetscInt low, *indx1, *indx2, tempindx, tempindx1; 898 PetscInt i,j,k,rank,size,partial_dim; 899 ptrdiff_t alloc_local,local_n0,local_0_start; 900 ptrdiff_t local_n1,local_1_start,temp; 901 VecScatter vecscat; 902 IS list1,list2; 903 904 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 905 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 906 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 907 printf("Local ownership starts at %d\n",low); 908 909 if (size==1){ 910 switch (ndim){ 911 case 1: 912 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 913 for (i=0;i<dim[0];i++) 914 { 915 indx1[i] = i; 916 } 917 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 918 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 919 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 920 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 921 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 922 ierr = ISDestroy(&list1);CHKERRQ(ierr); 923 ierr = PetscFree(indx1);CHKERRQ(ierr); 924 break; 925 926 case 2: 927 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 928 for (i=0;i<dim[0];i++){ 929 for (j=0;j<dim[1];j++){ 930 indx1[i*dim[1]+j] = i*dim[1] + j; 931 } 932 } 933 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 934 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 935 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 937 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 938 ierr = ISDestroy(&list1);CHKERRQ(ierr); 939 ierr = PetscFree(indx1);CHKERRQ(ierr); 940 break; 941 case 3: 942 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 943 for (i=0;i<dim[0];i++){ 944 for (j=0;j<dim[1];j++){ 945 for (k=0;k<dim[2];k++){ 946 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 947 } 948 } 949 } 950 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 951 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 952 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 953 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 954 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 955 ierr = ISDestroy(&list1);CHKERRQ(ierr); 956 ierr = PetscFree(indx1);CHKERRQ(ierr); 957 break; 958 default: 959 ierr = ISCreateStride(comm,N,0,1,&list1); 960 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 961 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 962 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 963 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 964 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 965 ierr = ISDestroy(&list1);CHKERRQ(ierr); 966 break; 967 } 968 } 969 else{ 970 971 switch (ndim){ 972 case 1: 973 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 974 break; 975 case 2: 976 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 977 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 978 979 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 980 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 981 printf("Val local_0_start = %ld\n",local_0_start); 982 983 if (dim[1]%2==0) 984 NM = dim[1]+2; 985 else 986 NM = dim[1]+1; 987 988 989 990 for (i=0;i<local_n0;i++){ 991 for (j=0;j<dim[1];j++){ 992 tempindx = i*dim[1] + j; 993 tempindx1 = i*NM + j; 994 indx1[tempindx]=local_0_start*dim[1]+tempindx; 995 indx2[tempindx]=low+tempindx1; 996 // printf("Val tempindx1 = %d\n",tempindx1); 997 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 998 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 999 // printf("-------------------------\n",indx2[tempindx],rank); 1000 } 1001 } 1002 1003 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1004 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1005 1006 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1007 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1008 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1009 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1010 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1011 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1012 ierr = PetscFree(indx1);CHKERRQ(ierr); 1013 ierr = PetscFree(indx2);CHKERRQ(ierr); 1014 break; 1015 1016 case 3: 1017 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1018 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1019 1020 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1021 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1022 printf("Val local_0_start = %ld\n",local_0_start); 1023 1024 if (dim[2]%2==0) 1025 NM = dim[2]+2; 1026 else 1027 NM = dim[2]+1; 1028 1029 for (i=0;i<local_n0;i++){ 1030 for (j=0;j<dim[1];j++){ 1031 for (k=0;k<dim[2];k++){ 1032 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1033 tempindx1 = i*dim[1]*NM + j*NM + k; 1034 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1035 indx2[tempindx]=low+tempindx1; 1036 } 1037 // printf("Val tempindx1 = %d\n",tempindx1); 1038 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1039 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1040 // printf("-------------------------\n",indx2[tempindx],rank); 1041 } 1042 } 1043 1044 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1045 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1046 1047 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1048 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1049 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1050 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1051 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1052 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1053 ierr = PetscFree(indx1);CHKERRQ(ierr); 1054 ierr = PetscFree(indx2);CHKERRQ(ierr); 1055 break; 1056 1057 default: 1058 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1059 printf("The value of temp is %ld\n",temp); 1060 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1061 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1062 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1063 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1064 1065 partial_dim = fftw->partial_dim; 1066 printf("The value of partial dim is %d\n",partial_dim); 1067 1068 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1069 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1070 printf("Val local_0_start = %ld\n",local_0_start); 1071 1072 if (dim[ndim-1]%2==0) 1073 NM = 2; 1074 else 1075 NM = 1; 1076 1077 j = low; 1078 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1079 { 1080 indx1[i] = local_0_start*partial_dim + i; 1081 indx2[i] = j; 1082 //printf("The values are %d and %d from %d\n",indx1[i],indx2[i],rank); 1083 if (k%dim[ndim-1]==0) 1084 { j+=NM;} 1085 j++; 1086 } 1087 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1088 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1089 1090 //ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1091 1092 1093 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1094 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1095 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1096 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1097 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1098 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1099 ierr = PetscFree(indx1);CHKERRQ(ierr); 1100 ierr = PetscFree(indx2);CHKERRQ(ierr); 1101 1102 break; 1103 } 1104 } 1105 return 0; 1106 } 1107 EXTERN_C_END 1108 1109 EXTERN_C_BEGIN 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatCreate_FFTW" 1112 /* 1113 MatCreate_FFTW - Creates a matrix object that provides FFT 1114 via the external package FFTW 1115 Options Database Keys: 1116 + -mat_fftw_plannerflags - set FFTW planner flags 1117 1118 Level: intermediate 1119 1120 */ 1121 1122 PetscErrorCode MatCreate_FFTW(Mat A) 1123 { 1124 PetscErrorCode ierr; 1125 MPI_Comm comm=((PetscObject)A)->comm; 1126 Mat_FFT *fft=(Mat_FFT*)A->data; 1127 Mat_FFTW *fftw; 1128 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1129 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1130 PetscBool flg; 1131 PetscInt p_flag,partial_dim=1,ctr,N1; 1132 PetscMPIInt size,rank; 1133 ptrdiff_t *pdim, temp; 1134 ptrdiff_t local_n1,local_1_start; 1135 1136 PetscFunctionBegin; 1137 //#if !defined(PETSC_USE_COMPLEX) 1138 // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 1139 //#endif 1140 1141 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1142 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1143 ierr = MPI_Barrier(PETSC_COMM_WORLD); 1144 1145 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1146 pdim[0] = dim[0]; 1147 for(ctr=1;ctr<ndim;ctr++) 1148 { 1149 partial_dim *= dim[ctr]; 1150 pdim[ctr] = dim[ctr]; 1151 } 1152 //#if !defined(PETSC_USE_COMPLEX) 1153 // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 1154 //#endif 1155 1156 // printf("partial dimension is %d",partial_dim); 1157 if (size == 1) { 1158 #if defined(PETSC_USE_COMPLEX) 1159 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1160 n = N; 1161 #else 1162 int tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1163 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1164 n = tot_dim; 1165 #endif 1166 1167 } else { 1168 ptrdiff_t alloc_local,local_n0,local_0_start;//local_n1,local_1_end; 1169 switch (ndim){ 1170 case 1: 1171 #if !defined(PETSC_USE_COMPLEX) 1172 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1173 #else 1174 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 1175 n = (PetscInt)local_n0; 1176 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1177 #endif 1178 // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 1179 break; 1180 case 2: 1181 #if defined(PETSC_USE_COMPLEX) 1182 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1183 /* 1184 PetscMPIInt rank; 1185 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1186 PetscSynchronizedFlush(comm); 1187 */ 1188 n = (PetscInt)local_n0*dim[1]; 1189 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1190 #else 1191 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1192 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1193 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1194 #endif 1195 break; 1196 case 3: 1197 // printf("The value of alloc local is %d",alloc_local); 1198 #if defined(PETSC_USE_COMPLEX) 1199 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1200 n = (PetscInt)local_n0*dim[1]*dim[2]; 1201 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1202 #else 1203 printf("The code comes here\n"); 1204 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1205 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1206 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1207 #endif 1208 break; 1209 default: 1210 #if defined(PETSC_USE_COMPLEX) 1211 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1212 // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 1213 // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 1214 n = (PetscInt)local_n0*partial_dim; 1215 // printf("New partial dimension is %d %d %d",n,N,ndim); 1216 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1217 #else 1218 temp = pdim[ndim-1]; 1219 pdim[ndim-1]= temp/2 + 1; 1220 printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 1221 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1222 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1223 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1224 pdim[ndim-1] = temp; 1225 printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 1226 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1227 #endif 1228 break; 1229 } 1230 } 1231 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1232 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1233 fft->data = (void*)fftw; 1234 1235 fft->n = n; 1236 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1237 fftw->partial_dim = partial_dim; 1238 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1239 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1240 1241 fftw->p_forward = 0; 1242 fftw->p_backward = 0; 1243 fftw->p_flag = FFTW_ESTIMATE; 1244 1245 if (size == 1){ 1246 A->ops->mult = MatMult_SeqFFTW; 1247 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1248 } else { 1249 A->ops->mult = MatMult_MPIFFTW; 1250 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1251 } 1252 fft->matdestroy = MatDestroy_FFTW; 1253 // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 1254 A->ops->getvecs = MatGetVecs_FFTW; 1255 A->assembled = PETSC_TRUE; 1256 #if !defined(PETSC_USE_COMPLEX) 1257 PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 1258 PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1259 #endif 1260 1261 /* get runtime options */ 1262 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1263 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1264 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1265 PetscOptionsEnd(); 1266 PetscFunctionReturn(0); 1267 } 1268 EXTERN_C_END 1269 1270 1271 1272 1273