1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 fftw_plan p_forward,p_backward; 15 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 16 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 17 executed for the arrays with which the plan was created */ 18 } Mat_FFTW; 19 20 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 21 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatDestroy_FFTW(Mat); 25 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 26 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); 27 28 #undef __FUNCT__ 29 #define __FUNCT__ "MatMult_SeqFFTW" 30 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 31 { 32 PetscErrorCode ierr; 33 Mat_FFT *fft = (Mat_FFT*)A->data; 34 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 35 PetscScalar *x_array,*y_array; 36 PetscInt ndim=fft->ndim,*dim=fft->dim; 37 38 PetscFunctionBegin; 39 #if !defined(PETSC_USE_COMPLEX) 40 41 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 42 #endif 43 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 44 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 45 if (!fftw->p_forward){ /* create a plan, then excute it */ 46 switch (ndim){ 47 case 1: 48 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 49 break; 50 case 2: 51 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 52 break; 53 case 3: 54 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 55 break; 56 default: 57 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 58 break; 59 } 60 fftw->finarray = x_array; 61 fftw->foutarray = y_array; 62 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 63 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 64 fftw_execute(fftw->p_forward); 65 } else { /* use existing plan */ 66 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 67 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 68 } else { 69 fftw_execute(fftw->p_forward); 70 } 71 } 72 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 73 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 74 PetscFunctionReturn(0); 75 } 76 77 #undef __FUNCT__ 78 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 79 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 80 { 81 PetscErrorCode ierr; 82 Mat_FFT *fft = (Mat_FFT*)A->data; 83 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 84 PetscScalar *x_array,*y_array; 85 PetscInt ndim=fft->ndim,*dim=fft->dim; 86 87 PetscFunctionBegin; 88 #if !defined(PETSC_USE_COMPLEX) 89 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 90 #endif 91 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 92 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 93 if (!fftw->p_backward){ /* create a plan, then excute it */ 94 switch (ndim){ 95 case 1: 96 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 97 break; 98 case 2: 99 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 100 break; 101 case 3: 102 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 103 break; 104 default: 105 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 106 break; 107 } 108 fftw->binarray = x_array; 109 fftw->boutarray = y_array; 110 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 111 } else { /* use existing plan */ 112 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 113 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 114 } else { 115 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 116 } 117 } 118 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 119 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 120 PetscFunctionReturn(0); 121 } 122 123 #undef __FUNCT__ 124 #define __FUNCT__ "MatMult_MPIFFTW" 125 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 126 { 127 PetscErrorCode ierr; 128 Mat_FFT *fft = (Mat_FFT*)A->data; 129 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 130 PetscScalar *x_array,*y_array; 131 PetscInt ndim=fft->ndim,*dim=fft->dim; 132 MPI_Comm comm=((PetscObject)A)->comm; 133 // PetscInt ctr; 134 // ptrdiff_t ndim1=(ptrdiff_t) ndim,*pdim; 135 // ndim1=(ptrdiff_t) ndim; 136 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 137 138 // for(ctr=0;ctr<ndim;ctr++) 139 // { 140 // pdim[ctr] = dim[ctr]; 141 // } 142 143 PetscFunctionBegin; 144 //#if !defined(PETSC_USE_COMPLEX) 145 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 146 //#endif 147 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 148 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 149 150 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 151 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 152 if (!fftw->p_forward){ /* create a plan, then excute it */ 153 switch (ndim){ 154 case 1: 155 #if defined(PETSC_USE_COMPLEX) 156 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 157 #endif 158 break; 159 case 2: 160 #if defined(PETSC_USE_COMPLEX) 161 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 162 #else 163 printf("The code comes here \n"); 164 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 165 #endif 166 break; 167 case 3: 168 #if defined(PETSC_USE_COMPLEX) 169 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 170 #else 171 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 172 #endif 173 break; 174 default: 175 #if defined(PETSC_USE_COMPLEX) 176 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 177 #else 178 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 179 #endif 180 // fftw->p_forward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 181 break; 182 } 183 fftw->finarray = x_array; 184 fftw->foutarray = y_array; 185 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 186 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 187 fftw_execute(fftw->p_forward); 188 } else { /* use existing plan */ 189 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 190 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 191 } else { 192 fftw_execute(fftw->p_forward); 193 } 194 } 195 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 196 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 197 PetscFunctionReturn(0); 198 } 199 200 #undef __FUNCT__ 201 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 202 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 203 { 204 PetscErrorCode ierr; 205 Mat_FFT *fft = (Mat_FFT*)A->data; 206 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 207 PetscScalar *x_array,*y_array; 208 PetscInt ndim=fft->ndim,*dim=fft->dim; 209 MPI_Comm comm=((PetscObject)A)->comm; 210 // PetscInt ctr; 211 // ptrdiff_t ndim1=(ptrdiff_t)ndim,*pdim; 212 // ndim1=(ptrdiff_t) ndim; 213 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 214 215 // for(ctr=0;ctr<ndim;ctr++) 216 // { 217 // pdim[ctr] = dim[ctr]; 218 // } 219 220 PetscFunctionBegin; 221 //#if !defined(PETSC_USE_COMPLEX) 222 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 223 //#endif 224 // ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&pdim);CHKERRQ(ierr); 225 // should pdim be a member of Mat_FFTW? 226 // for (ctr=0; ctr<ndim; ctr++) pdim[ctr] = dim[ctr]; 227 228 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 229 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 230 if (!fftw->p_backward){ /* create a plan, then excute it */ 231 switch (ndim){ 232 case 1: 233 #if defined(PETSC_USE_COMPLEX) 234 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 235 #endif 236 break; 237 case 2: 238 #if defined(PETSC_USE_COMPLEX) 239 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 240 #else 241 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 242 #endif 243 break; 244 case 3: 245 #if defined(PETSC_USE_COMPLEX) 246 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 247 #else 248 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 249 #endif 250 break; 251 default: 252 #if defined(PETSC_USE_COMPLEX) 253 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 254 #else 255 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 256 #endif 257 // fftw->p_backward = fftw_mpi_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 258 break; 259 } 260 fftw->binarray = x_array; 261 fftw->boutarray = y_array; 262 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 263 } else { /* use existing plan */ 264 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 265 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 266 } else { 267 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 268 } 269 } 270 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 271 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 275 #undef __FUNCT__ 276 #define __FUNCT__ "MatDestroy_FFTW" 277 PetscErrorCode MatDestroy_FFTW(Mat A) 278 { 279 Mat_FFT *fft = (Mat_FFT*)A->data; 280 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 281 PetscErrorCode ierr; 282 283 PetscFunctionBegin; 284 #if !defined(PETSC_USE_COMPLEX) 285 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 286 #endif 287 fftw_destroy_plan(fftw->p_forward); 288 fftw_destroy_plan(fftw->p_backward); 289 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 290 ierr = PetscFree(fft->data);CHKERRQ(ierr); 291 PetscFunctionReturn(0); 292 } 293 294 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 295 #undef __FUNCT__ 296 #define __FUNCT__ "VecDestroy_MPIFFTW" 297 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 298 { 299 PetscErrorCode ierr; 300 PetscScalar *array; 301 302 PetscFunctionBegin; 303 #if !defined(PETSC_USE_COMPLEX) 304 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 305 #endif 306 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 307 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 308 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 309 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 310 PetscFunctionReturn(0); 311 } 312 313 #undef __FUNCT__ 314 #define __FUNCT__ "MatGetVecs1DC_FFTW" 315 /* 316 MatGetVecs_FFTW1D - Get Vectors(s) compatible with matrix, i.e. with the 317 parallel layout determined by FFTW-1D 318 319 */ 320 PetscErrorCode MatGetVecs_FFTW1D(Mat A,Vec *fin,Vec *fout,Vec *bin,Vec *bout) 321 { 322 PetscErrorCode ierr; 323 PetscMPIInt size,rank; 324 MPI_Comm comm=((PetscObject)A)->comm; 325 Mat_FFT *fft = (Mat_FFT*)A->data; 326 // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 327 PetscInt N=fft->N; 328 PetscInt ndim=fft->ndim,*dim=fft->dim; 329 ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 330 ptrdiff_t f_local_n1,f_local_1_end; 331 ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 332 ptrdiff_t b_local_n1,b_local_1_end; 333 fftw_complex *data_fin,*data_fout,*data_bin,*data_bout; 334 335 PetscFunctionBegin; 336 #if !defined(PETSC_USE_COMPLEX) 337 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 338 #endif 339 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 340 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 341 if (size == 1){ 342 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 343 } 344 else { 345 if (ndim>1){ 346 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 347 else { 348 f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 349 b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 350 if (fin) { 351 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 352 ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 353 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 354 } 355 if (fout) { 356 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 357 ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 358 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 359 } 360 if (bin) { 361 data_bin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 362 ierr = VecCreateMPIWithArray(comm,b_local_n0,N,(const PetscScalar*)data_bin,bin);CHKERRQ(ierr); 363 (*bin)->ops->destroy = VecDestroy_MPIFFTW; 364 } 365 if (bout) { 366 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 367 ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 368 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 369 } 370 } 371 if (fin){ 372 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 373 } 374 if (fout){ 375 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 376 } 377 if (bin){ 378 ierr = PetscLayoutReference(A->rmap,&(*bin)->map);CHKERRQ(ierr); 379 } 380 if (bout){ 381 ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 382 } 383 PetscFunctionReturn(0); 384 } 385 386 387 } 388 389 #undef __FUNCT__ 390 #define __FUNCT__ "MatGetVecs_FFTW" 391 /* 392 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 393 parallel layout determined by FFTW 394 395 Collective on Mat 396 397 Input Parameter: 398 . mat - the matrix 399 400 Output Parameter: 401 + fin - (optional) input vector of forward FFTW 402 - fout - (optional) output vector of forward FFTW 403 404 Level: advanced 405 406 .seealso: MatCreateFFTW() 407 */ 408 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 409 { 410 PetscErrorCode ierr; 411 PetscMPIInt size,rank; 412 MPI_Comm comm=((PetscObject)A)->comm; 413 Mat_FFT *fft = (Mat_FFT*)A->data; 414 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 415 PetscInt N=fft->N, N1, n1,vsize; 416 417 PetscFunctionBegin; 418 //#if !defined(PETSC_USE_COMPLEX) 419 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 420 //#endif 421 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 422 PetscValidType(A,1); 423 424 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 425 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 426 if (size == 1){ /* sequential case */ 427 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 428 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 429 printf("The code successfully comes at the end of the routine with one processor\n"); 430 } else { /* mpi case */ 431 ptrdiff_t alloc_local,local_n0,local_0_start; 432 ptrdiff_t local_n1,local_1_end; 433 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 434 fftw_complex *data_fin,*data_fout; 435 double *data_finr ; 436 ptrdiff_t local_1_start,temp; 437 // PetscInt ctr; 438 // ptrdiff_t ndim1,*pdim; 439 // ndim1=(ptrdiff_t) ndim; 440 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 441 442 // for(ctr=0;ctr<ndim;ctr++) 443 // {k 444 // pdim[ctr] = dim[ctr]; 445 // } 446 447 448 449 switch (ndim){ 450 case 1: 451 /* Get local size */ 452 /* We need to write an error message here saying that one cannot call this routine when doing paralllel 1D complex FFTW */ 453 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 454 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 455 if (fin) { 456 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 457 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 458 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 459 } 460 if (fout) { 461 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 462 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 463 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 464 } 465 break; 466 case 2: 467 #if !defined(PETSC_USE_COMPLEX) 468 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 469 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 470 if (fin) { 471 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 472 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 473 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 474 //printf("The code comes here with vector size %d\n",vsize); 475 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 476 } 477 if (fout) { 478 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 479 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 480 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 481 } 482 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 483 484 #else 485 /* Get local size */ 486 printf("Hope this does not come here"); 487 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 488 if (fin) { 489 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 490 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 491 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 492 } 493 if (fout) { 494 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 495 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 496 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 497 } 498 printf("Hope this does not come here"); 499 #endif 500 break; 501 case 3: 502 /* Get local size */ 503 #if !defined(PETSC_USE_COMPLEX) 504 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 505 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 506 if (fin) { 507 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 508 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 509 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 510 //printf("The code comes here with vector size %d\n",vsize); 511 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 512 } 513 if (fout) { 514 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 515 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 516 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 517 } 518 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 519 520 521 #else 522 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 523 // printf("The quantity n is %d",n); 524 if (fin) { 525 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 526 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 527 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 528 } 529 if (fout) { 530 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 531 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 532 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 533 } 534 #endif 535 break; 536 default: 537 /* Get local size */ 538 #if !defined(PETSC_USE_COMPLEX) 539 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 540 printf("The value of temp is %ld\n",temp); 541 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 542 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 543 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 544 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 545 if (fin) { 546 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 547 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 548 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 549 //printf("The code comes here with vector size %d\n",vsize); 550 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 551 } 552 if (fout) { 553 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 554 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 555 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 556 } 557 558 #else 559 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 560 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 561 // printf("The value of alloc local is %d",alloc_local); 562 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 563 // for(i=0;i<ndim;i++) 564 // { 565 // pdim[i]=dim[i];printf("%d",pdim[i]); 566 // } 567 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 568 // printf("The quantity n is %d",n); 569 if (fin) { 570 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 571 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 572 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 573 } 574 if (fout) { 575 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 576 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 577 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 578 } 579 #endif 580 break; 581 } 582 } 583 // if (fin){ 584 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 585 // } 586 // if (fout){ 587 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 588 // } 589 PetscFunctionReturn(0); 590 } 591 592 #undef __FUNCT__ 593 #define __FUNCT__ "InputTransformFFT" 594 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 595 { 596 PetscErrorCode ierr; 597 PetscFunctionBegin; 598 ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 599 PetscFunctionReturn(0); 600 } 601 602 /* 603 InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block 604 Input A, x, y 605 A - FFTW matrix 606 x - user data 607 Options Database Keys: 608 + -mat_fftw_plannerflags - set FFTW planner flags 609 610 Level: intermediate 611 612 */ 613 614 EXTERN_C_BEGIN 615 #undef __FUNCT__ 616 #define __FUNCT__ "InputTransformFFT_FTTW" 617 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 618 { 619 PetscErrorCode ierr; 620 MPI_Comm comm=((PetscObject)A)->comm; 621 Mat_FFT *fft = (Mat_FFT*)A->data; 622 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 623 PetscInt N=fft->N, N1, n1 ,NM; 624 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 625 PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; 626 PetscInt i,j,k,rank,size; 627 ptrdiff_t alloc_local,local_n0,local_0_start; 628 ptrdiff_t local_n1,local_1_start; 629 VecScatter vecscat; 630 IS list1,list2; 631 632 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 633 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 634 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 635 printf("Local ownership starts at %d\n",low); 636 637 switch (ndim){ 638 case 1: 639 SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW"); 640 break; 641 case 2: 642 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 643 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 644 645 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 646 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 647 printf("Val local_0_start = %ld\n",local_0_start); 648 649 if (dim[1]%2==0) 650 NM = dim[1]+2; 651 else 652 NM = dim[1]+1; 653 654 for (i=0;i<local_n0;i++){ 655 for (j=0;j<dim[1];j++){ 656 tempindx = i*dim[1] + j; 657 tempindx1 = i*NM + j; 658 indx1[tempindx]=local_0_start*dim[1]+tempindx; 659 indx2[tempindx]=low+tempindx1; 660 // printf("Val tempindx1 = %d\n",tempindx1); 661 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 662 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 663 // printf("-------------------------\n",indx2[tempindx],rank); 664 } 665 } 666 667 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 668 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 669 670 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 671 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 672 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 673 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 674 break; 675 676 case 3: 677 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 678 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 679 680 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 681 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 682 printf("Val local_0_start = %ld\n",local_0_start); 683 684 if (dim[2]%2==0) 685 NM = dim[2]+2; 686 else 687 NM = dim[2]+1; 688 689 for (i=0;i<local_n0;i++){ 690 for (j=0;j<dim[1];j++){ 691 for (k=0;k<dim[2];k++){ 692 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 693 tempindx1 = i*dim[1]*NM + j*NM + k; 694 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 695 indx2[tempindx]=low+tempindx1; 696 } 697 // printf("Val tempindx1 = %d\n",tempindx1); 698 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 699 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 700 // printf("-------------------------\n",indx2[tempindx],rank); 701 } 702 } 703 704 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 705 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 706 707 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 708 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 709 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 710 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 711 break; 712 713 default: 714 SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 715 break; 716 } 717 718 return 0; 719 } 720 EXTERN_C_END 721 722 #undef __FUNCT__ 723 #define __FUNCT__ "OutputTransformFFT" 724 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 725 { 726 PetscErrorCode ierr; 727 PetscFunctionBegin; 728 ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 729 PetscFunctionReturn(0); 730 } 731 732 /* 733 OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 734 Input A, x, y 735 A - FFTW matrix 736 x - FFTW vector 737 y - PETSc vector that user can read 738 Options Database Keys: 739 + -mat_fftw_plannerflags - set FFTW planner flags 740 741 Level: intermediate 742 743 */ 744 745 EXTERN_C_BEGIN 746 #undef __FUNCT__ 747 #define __FUNCT__ "OutputTransformFFT_FTTW" 748 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 749 { 750 PetscErrorCode ierr; 751 MPI_Comm comm=((PetscObject)A)->comm; 752 Mat_FFT *fft = (Mat_FFT*)A->data; 753 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 754 PetscInt N=fft->N, N1, n1 ,NM; 755 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 756 PetscInt low, *indx1, *indx2, tempindx, tempindx1, *indx3, *indx4; 757 PetscInt i,j,k,rank,size; 758 ptrdiff_t alloc_local,local_n0,local_0_start; 759 ptrdiff_t local_n1,local_1_start; 760 VecScatter vecscat; 761 IS list1,list2; 762 763 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 764 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 765 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL); 766 printf("Local ownership starts at %d\n",low); 767 768 switch (ndim){ 769 case 1: 770 SETERRQ(comm,PETSC_ERR_SUP,"Not Supported by FFTW"); 771 break; 772 case 2: 773 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 774 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 775 776 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 777 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 778 printf("Val local_0_start = %ld\n",local_0_start); 779 780 if (dim[1]%2==0) 781 NM = dim[1]+2; 782 else 783 NM = dim[1]+1; 784 785 786 787 for (i=0;i<local_n0;i++){ 788 for (j=0;j<dim[1];j++){ 789 tempindx = i*dim[1] + j; 790 tempindx1 = i*NM + j; 791 indx1[tempindx]=local_0_start*dim[1]+tempindx; 792 indx2[tempindx]=low+tempindx1; 793 // printf("Val tempindx1 = %d\n",tempindx1); 794 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 795 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 796 // printf("-------------------------\n",indx2[tempindx],rank); 797 } 798 } 799 800 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 801 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 802 803 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 804 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 805 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 806 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 807 break; 808 809 case 3: 810 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 811 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 812 813 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 814 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 815 printf("Val local_0_start = %ld\n",local_0_start); 816 817 if (dim[2]%2==0) 818 NM = dim[2]+2; 819 else 820 NM = dim[2]+1; 821 822 for (i=0;i<local_n0;i++){ 823 for (j=0;j<dim[1];j++){ 824 for (k=0;k<dim[2];k++){ 825 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 826 tempindx1 = i*dim[1]*NM + j*NM + k; 827 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 828 indx2[tempindx]=low+tempindx1; 829 } 830 // printf("Val tempindx1 = %d\n",tempindx1); 831 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 832 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 833 // printf("-------------------------\n",indx2[tempindx],rank); 834 } 835 } 836 837 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 838 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 839 840 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 841 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 843 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 844 break; 845 846 default: 847 SETERRQ(comm,PETSC_ERR_SUP,"Not Done Yet"); 848 break; 849 } 850 return 0; 851 } 852 EXTERN_C_END 853 854 EXTERN_C_BEGIN 855 #undef __FUNCT__ 856 #define __FUNCT__ "MatCreate_FFTW" 857 /* 858 MatCreate_FFTW - Creates a matrix object that provides FFT 859 via the external package FFTW 860 Options Database Keys: 861 + -mat_fftw_plannerflags - set FFTW planner flags 862 863 Level: intermediate 864 865 */ 866 867 PetscErrorCode MatCreate_FFTW(Mat A) 868 { 869 PetscErrorCode ierr; 870 MPI_Comm comm=((PetscObject)A)->comm; 871 Mat_FFT *fft=(Mat_FFT*)A->data; 872 Mat_FFTW *fftw; 873 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 874 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 875 PetscBool flg; 876 PetscInt p_flag,partial_dim=1,ctr,N1; 877 PetscMPIInt size,rank; 878 ptrdiff_t *pdim, temp; 879 ptrdiff_t local_n1,local_1_start; 880 881 PetscFunctionBegin; 882 //#if !defined(PETSC_USE_COMPLEX) 883 // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 884 //#endif 885 886 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 887 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 888 ierr = MPI_Barrier(PETSC_COMM_WORLD); 889 890 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 891 pdim[0] = dim[0]; 892 for(ctr=1;ctr<ndim;ctr++) 893 { 894 partial_dim *= dim[ctr]; 895 pdim[ctr] = dim[ctr]; 896 } 897 //#if !defined(PETSC_USE_COMPLEX) 898 // SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers"); 899 //#endif 900 901 // printf("partial dimension is %d",partial_dim); 902 if (size == 1) { 903 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 904 n = N; 905 } else { 906 ptrdiff_t alloc_local,local_n0,local_0_start,local_n1,local_1_end; 907 switch (ndim){ 908 case 1: 909 #if !defined(PETSC_USE_COMPLEX) 910 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 911 #endif 912 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 913 n = (PetscInt)local_n0; 914 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 915 // PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecs1DC_C","MatGetVecs1DC_FFTW",MatGetVecs1DC_FFTW); 916 break; 917 case 2: 918 #if defined(PETSC_USE_COMPLEX) 919 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 920 /* 921 PetscMPIInt rank; 922 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 923 PetscSynchronizedFlush(comm); 924 */ 925 n = (PetscInt)local_n0*dim[1]; 926 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 927 #else 928 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 929 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 930 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 931 #endif 932 break; 933 case 3: 934 // printf("The value of alloc local is %d",alloc_local); 935 #if defined(PETSC_USE_COMPLEX) 936 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 937 n = (PetscInt)local_n0*dim[1]*dim[2]; 938 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 939 #else 940 printf("The code comes here\n"); 941 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 942 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 943 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 944 #endif 945 break; 946 default: 947 #if defined(PETSC_USE_COMPLEX) 948 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 949 // printf("The value of alloc local is %ld from process %d\n",alloc_local,rank); 950 // alloc_local = fftw_mpi_local_size(ndim,dim,comm,&local_n0,&local_0_start); 951 n = (PetscInt)local_n0*partial_dim; 952 // printf("New partial dimension is %d %d %d",n,N,ndim); 953 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 954 #else 955 temp = pdim[ndim-1]; 956 pdim[ndim-1]= temp/2 + 1; 957 printf("For Multi dim case temp = %ld, pdim[ndim-1] = %ld\n",temp,pdim[ndim-1]); 958 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 959 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 960 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 961 pdim[ndim-1] = temp; 962 printf("For Multi dim case n = %d, N1 = %d\n",n,N1); 963 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 964 #endif 965 break; 966 } 967 } 968 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 969 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 970 fft->data = (void*)fftw; 971 972 fft->n = n; 973 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 974 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 975 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 976 977 fftw->p_forward = 0; 978 fftw->p_backward = 0; 979 fftw->p_flag = FFTW_ESTIMATE; 980 981 if (size == 1){ 982 A->ops->mult = MatMult_SeqFFTW; 983 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 984 } else { 985 A->ops->mult = MatMult_MPIFFTW; 986 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 987 } 988 fft->matdestroy = MatDestroy_FFTW; 989 A->ops->getvecs = MatGetVecs_FFTW; 990 A->assembled = PETSC_TRUE; 991 #if !defined(PETSC_USE_COMPLEX) 992 PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 993 PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 994 #endif 995 996 /* get runtime options */ 997 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 998 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 999 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1000 PetscOptionsEnd(); 1001 PetscFunctionReturn(0); 1002 } 1003 EXTERN_C_END 1004 1005 1006 1007 1008