1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 28 /* MatMult_SeqFFTW performs forward DFT in parallel 29 30 Input parameter: 31 mat - the matrix 32 x - the vector on which FDFT will be performed 33 34 Output parameter: 35 y - vector that stores result of FDFT 36 37 */ 38 #undef __FUNCT__ 39 #define __FUNCT__ "MatMult_SeqFFTW" 40 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 41 { 42 PetscErrorCode ierr; 43 Mat_FFT *fft = (Mat_FFT*)A->data; 44 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 45 PetscScalar *x_array,*y_array; 46 PetscInt ndim=fft->ndim,*dim=fft->dim; 47 48 PetscFunctionBegin; 49 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 50 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 51 if (!fftw->p_forward){ /* create a plan, then excute it */ 52 switch (ndim){ 53 case 1: 54 #if defined(PETSC_USE_COMPLEX) 55 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 56 #else 57 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 58 #endif 59 break; 60 case 2: 61 #if defined(PETSC_USE_COMPLEX) 62 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 63 #else 64 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 65 #endif 66 break; 67 case 3: 68 #if defined(PETSC_USE_COMPLEX) 69 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 70 #else 71 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 72 #endif 73 break; 74 default: 75 #if defined(PETSC_USE_COMPLEX) 76 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 77 #else 78 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 79 #endif 80 break; 81 } 82 fftw->finarray = x_array; 83 fftw->foutarray = y_array; 84 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 85 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 86 fftw_execute(fftw->p_forward); 87 } else { /* use existing plan */ 88 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 89 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 90 } else { 91 fftw_execute(fftw->p_forward); 92 } 93 } 94 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 95 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 96 PetscFunctionReturn(0); 97 } 98 99 /* MatMultTranspose_SeqFFTW performs serial backward DFT 100 101 Input parameter: 102 mat - the matrix 103 x - the vector on which BDFT will be performed 104 105 Output parameter: 106 y - vector that stores result of BDFT 107 108 */ 109 110 #undef __FUNCT__ 111 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 112 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 113 { 114 PetscErrorCode ierr; 115 Mat_FFT *fft = (Mat_FFT*)A->data; 116 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 117 PetscScalar *x_array,*y_array; 118 PetscInt ndim=fft->ndim,*dim=fft->dim; 119 120 PetscFunctionBegin; 121 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 122 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 123 if (!fftw->p_backward){ /* create a plan, then excute it */ 124 switch (ndim){ 125 case 1: 126 #if defined(PETSC_USE_COMPLEX) 127 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 128 #else 129 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 130 #endif 131 break; 132 case 2: 133 #if defined(PETSC_USE_COMPLEX) 134 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 135 #else 136 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 137 #endif 138 break; 139 case 3: 140 #if defined(PETSC_USE_COMPLEX) 141 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 142 #else 143 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 144 #endif 145 break; 146 default: 147 #if defined(PETSC_USE_COMPLEX) 148 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 149 #else 150 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 151 #endif 152 break; 153 } 154 fftw->binarray = x_array; 155 fftw->boutarray = y_array; 156 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 157 } else { /* use existing plan */ 158 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 159 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 160 } else { 161 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 162 } 163 } 164 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 165 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 166 PetscFunctionReturn(0); 167 } 168 169 /* MatMult_MPIFFTW performs forward DFT in parallel 170 171 Input parameter: 172 mat - the matrix 173 x - the vector on which FDFT will be performed 174 175 Output parameter: 176 y - vector that stores result of FDFT 177 178 */ 179 180 #undef __FUNCT__ 181 #define __FUNCT__ "MatMult_MPIFFTW" 182 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 183 { 184 PetscErrorCode ierr; 185 Mat_FFT *fft = (Mat_FFT*)A->data; 186 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 187 PetscScalar *x_array,*y_array; 188 PetscInt ndim=fft->ndim,*dim=fft->dim; 189 MPI_Comm comm=((PetscObject)A)->comm; 190 191 PetscFunctionBegin; 192 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 193 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 194 if (!fftw->p_forward){ /* create a plan, then excute it */ 195 switch (ndim){ 196 case 1: 197 #if defined(PETSC_USE_COMPLEX) 198 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 199 #else 200 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 201 #endif 202 break; 203 case 2: 204 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 205 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 206 #else 207 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 208 #endif 209 break; 210 case 3: 211 #if defined(PETSC_USE_COMPLEX) 212 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 213 #else 214 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 215 #endif 216 break; 217 default: 218 #if defined(PETSC_USE_COMPLEX) 219 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 220 #else 221 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 222 #endif 223 break; 224 } 225 fftw->finarray = x_array; 226 fftw->foutarray = y_array; 227 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 228 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 229 fftw_execute(fftw->p_forward); 230 } else { /* use existing plan */ 231 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 232 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 233 } else { 234 fftw_execute(fftw->p_forward); 235 } 236 } 237 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 238 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 239 PetscFunctionReturn(0); 240 } 241 242 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 243 244 Input parameter: 245 mat - the matrix 246 x - the vector on which BDFT will be performed 247 248 Output parameter: 249 y - vector that stores result of BDFT 250 251 */ 252 #undef __FUNCT__ 253 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 254 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 255 { 256 PetscErrorCode ierr; 257 Mat_FFT *fft = (Mat_FFT*)A->data; 258 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 259 PetscScalar *x_array,*y_array; 260 PetscInt ndim=fft->ndim,*dim=fft->dim; 261 MPI_Comm comm=((PetscObject)A)->comm; 262 263 PetscFunctionBegin; 264 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 265 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 266 if (!fftw->p_backward){ /* create a plan, then excute it */ 267 switch (ndim){ 268 case 1: 269 #if defined(PETSC_USE_COMPLEX) 270 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 271 #else 272 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 273 #endif 274 break; 275 case 2: 276 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 277 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 278 #else 279 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 280 #endif 281 break; 282 case 3: 283 #if defined(PETSC_USE_COMPLEX) 284 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 285 #else 286 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 287 #endif 288 break; 289 default: 290 #if defined(PETSC_USE_COMPLEX) 291 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 292 #else 293 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 294 #endif 295 break; 296 } 297 fftw->binarray = x_array; 298 fftw->boutarray = y_array; 299 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 300 } else { /* use existing plan */ 301 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 302 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 303 } else { 304 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 305 } 306 } 307 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 308 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 309 PetscFunctionReturn(0); 310 } 311 312 #undef __FUNCT__ 313 #define __FUNCT__ "MatDestroy_FFTW" 314 PetscErrorCode MatDestroy_FFTW(Mat A) 315 { 316 Mat_FFT *fft = (Mat_FFT*)A->data; 317 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 318 PetscErrorCode ierr; 319 320 PetscFunctionBegin; 321 fftw_destroy_plan(fftw->p_forward); 322 fftw_destroy_plan(fftw->p_backward); 323 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 324 ierr = PetscFree(fft->data);CHKERRQ(ierr); 325 PetscFunctionReturn(0); 326 } 327 328 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 329 #undef __FUNCT__ 330 #define __FUNCT__ "VecDestroy_MPIFFTW" 331 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 332 { 333 PetscErrorCode ierr; 334 PetscScalar *array; 335 336 PetscFunctionBegin; 337 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 338 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 339 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 340 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatGetVecsFFTW" 347 /*@ 348 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 349 parallel layout determined by FFTW 350 351 Collective on Mat 352 353 Input Parameter: 354 . mat - the matrix 355 356 Output Parameter: 357 + fin - (optional) input vector of forward FFTW 358 - fout - (optional) output vector of forward FFTW 359 - bout - (optional) output vector of backward FFTW 360 361 Level: advanced 362 Note: The parallel layout of output of forward FFTW is always same as the input 363 of the backward FFTW. But parallel layout of the input vector of forward 364 FFTW might not be same as the output of backward FFTW. 365 Also note that we need to provide enough space while doing parallel real transform. 366 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 367 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 368 last dimension from n to n/2+1 while invoking this routine. 369 370 .seealso: MatCreateFFTW() 371 @*/ 372 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 373 { 374 PetscErrorCode ierr; 375 376 PetscFunctionBegin; 377 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 378 PetscFunctionReturn(0); 379 } 380 381 EXTERN_C_BEGIN 382 #undef __FUNCT__ 383 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 384 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 385 { 386 PetscErrorCode ierr; 387 PetscMPIInt size,rank; 388 MPI_Comm comm=((PetscObject)A)->comm; 389 Mat_FFT *fft = (Mat_FFT*)A->data; 390 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 391 PetscInt N=fft->N; 392 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 393 394 PetscFunctionBegin; 395 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 396 PetscValidType(A,1); 397 398 ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 399 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 400 if (size == 1){ /* sequential case */ 401 #if defined(PETSC_USE_COMPLEX) 402 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 403 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 404 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 405 #else 406 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 407 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 408 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 409 #endif 410 } else { /* parallel cases */ 411 ptrdiff_t alloc_local,local_n0,local_0_start; 412 ptrdiff_t local_n1; 413 fftw_complex *data_fout; 414 ptrdiff_t local_1_start; 415 #if defined(PETSC_USE_COMPLEX) 416 fftw_complex *data_fin,*data_bout; 417 #else 418 double *data_finr,*data_boutr; 419 PetscInt n1,N1,vsize; 420 ptrdiff_t temp; 421 #endif 422 423 switch (ndim){ 424 case 1: 425 #if !defined(PETSC_USE_COMPLEX) 426 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 427 #else 428 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 429 if (fin) { 430 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 431 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 432 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 433 } 434 if (fout) { 435 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 436 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 437 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 438 } 439 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 440 if (bout) { 441 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 442 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 443 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 444 } 445 break; 446 #endif 447 case 2: 448 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 449 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 450 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 451 if (fin) { 452 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 453 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 454 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 455 } 456 if (bout) { 457 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 458 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 459 ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 460 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 461 } 462 //n1 = 2*local_n1*dim[0]; 463 if (fout) { 464 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 465 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 466 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 467 } 468 #else 469 /* Get local size */ 470 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 471 if (fin) { 472 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 473 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 474 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 475 } 476 if (fout) { 477 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 478 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 479 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 480 } 481 if (bout) { 482 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 483 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 484 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 485 } 486 #endif 487 break; 488 case 3: 489 #if !defined(PETSC_USE_COMPLEX) 490 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 491 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 492 if (fin) { 493 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 494 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 495 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 496 } 497 if (bout) { 498 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 499 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 500 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 501 } 502 //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 503 if (fout) { 504 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 505 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 506 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 507 } 508 #else 509 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 510 if (fin) { 511 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 512 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 513 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 514 } 515 if (fout) { 516 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 517 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 518 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 519 } 520 if (bout) { 521 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 522 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 523 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 524 } 525 #endif 526 break; 527 default: 528 #if !defined(PETSC_USE_COMPLEX) 529 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 530 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 531 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 532 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 533 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 534 535 if (fin) { 536 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 537 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 538 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 539 } 540 if (bout) { 541 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 542 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 543 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 544 } 545 //temp = fftw->partial_dim; 546 //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 547 //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 548 if (fout) { 549 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 550 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 551 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 #else 554 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 555 if (fin) { 556 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 557 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 558 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 559 } 560 if (fout) { 561 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 562 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 563 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 564 } 565 if (bout) { 566 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 567 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 568 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 569 } 570 #endif 571 break; 572 } 573 } 574 PetscFunctionReturn(0); 575 } 576 EXTERN_C_END 577 578 #undef __FUNCT__ 579 #define __FUNCT__ "VecScatterPetscToFFTW" 580 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 581 { 582 PetscErrorCode ierr; 583 PetscFunctionBegin; 584 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 585 PetscFunctionReturn(0); 586 } 587 588 /* 589 VecScatterPetscToFFTW_FFTW - Copies the user data to the vector that goes into FFTW block. 590 Input A, x, y 591 A - FFTW matrix 592 x - user data 593 Options Database Keys: 594 + -mat_fftw_plannerflags - set FFTW planner flags 595 596 Level: intermediate 597 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 598 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 599 zeros. This routine does that job by scattering operation. 600 601 602 */ 603 604 EXTERN_C_BEGIN 605 #undef __FUNCT__ 606 #define __FUNCT__ "VecScatterPetscToFFTW_FTTW" 607 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 608 { 609 PetscErrorCode ierr; 610 MPI_Comm comm=((PetscObject)A)->comm; 611 Mat_FFT *fft = (Mat_FFT*)A->data; 612 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 613 PetscInt N=fft->N; 614 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 615 PetscInt low; 616 PetscInt rank,size,vsize,vsize1; 617 ptrdiff_t alloc_local,local_n0,local_0_start; 618 ptrdiff_t local_n1,local_1_start; 619 VecScatter vecscat; 620 IS list1,list2; 621 #if !defined(PETSC_USE_COMPLEX) 622 PetscInt i,j,k,partial_dim; 623 PetscInt *indx1, *indx2, tempindx, tempindx1; 624 PetscInt N1, n1 ,NM; 625 ptrdiff_t temp; 626 #endif 627 628 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 629 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 630 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 631 632 if (size==1) 633 { 634 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 635 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 636 //printf("The values of sizes are %d and %d",vsize,vsize1); 637 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 638 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 639 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 640 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 641 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 642 ierr = ISDestroy(&list1);CHKERRQ(ierr); 643 } 644 645 else{ 646 647 switch (ndim){ 648 case 1: 649 #if defined(PETSC_USE_COMPLEX) 650 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 651 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 652 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 653 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 654 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 655 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 656 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 657 ierr = ISDestroy(&list1);CHKERRQ(ierr); 658 ierr = ISDestroy(&list2);CHKERRQ(ierr); 659 #else 660 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 661 #endif 662 break; 663 case 2: 664 #if defined(PETSC_USE_COMPLEX) 665 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 666 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 667 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 668 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 669 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 670 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 671 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 672 ierr = ISDestroy(&list1);CHKERRQ(ierr); 673 ierr = ISDestroy(&list2);CHKERRQ(ierr); 674 #else 675 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 676 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 677 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 678 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 679 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 680 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 681 682 if (dim[1]%2==0) 683 NM = dim[1]+2; 684 else 685 NM = dim[1]+1; 686 687 for (i=0;i<local_n0;i++){ 688 for (j=0;j<dim[1];j++){ 689 tempindx = i*dim[1] + j; 690 tempindx1 = i*NM + j; 691 indx1[tempindx]=local_0_start*dim[1]+tempindx; 692 indx2[tempindx]=low+tempindx1; 693 } 694 } 695 696 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 697 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 698 699 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 700 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 701 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 702 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 703 ierr = ISDestroy(&list1);CHKERRQ(ierr); 704 ierr = ISDestroy(&list2);CHKERRQ(ierr); 705 ierr = PetscFree(indx1);CHKERRQ(ierr); 706 ierr = PetscFree(indx2);CHKERRQ(ierr); 707 #endif 708 break; 709 710 case 3: 711 #if defined(PETSC_USE_COMPLEX) 712 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 713 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 714 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 715 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 716 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 717 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 718 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 719 ierr = ISDestroy(&list1);CHKERRQ(ierr); 720 ierr = ISDestroy(&list2);CHKERRQ(ierr); 721 #else 722 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 723 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 724 725 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 726 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 727 728 if (dim[2]%2==0) 729 NM = dim[2]+2; 730 else 731 NM = dim[2]+1; 732 733 for (i=0;i<local_n0;i++){ 734 for (j=0;j<dim[1];j++){ 735 for (k=0;k<dim[2];k++){ 736 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 737 tempindx1 = i*dim[1]*NM + j*NM + k; 738 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 739 indx2[tempindx]=low+tempindx1; 740 } 741 } 742 } 743 744 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 745 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 746 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 747 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 748 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 749 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 750 ierr = ISDestroy(&list1);CHKERRQ(ierr); 751 ierr = ISDestroy(&list2);CHKERRQ(ierr); 752 ierr = PetscFree(indx1);CHKERRQ(ierr); 753 ierr = PetscFree(indx2);CHKERRQ(ierr); 754 #endif 755 break; 756 757 default: 758 #if defined(PETSC_USE_COMPLEX) 759 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 760 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 761 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 762 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 763 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 764 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 765 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 766 ierr = ISDestroy(&list1);CHKERRQ(ierr); 767 ierr = ISDestroy(&list2);CHKERRQ(ierr); 768 #else 769 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 770 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 771 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 772 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 773 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 774 775 partial_dim = fftw->partial_dim; 776 777 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 778 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 779 780 if (dim[ndim-1]%2==0) 781 NM = 2; 782 else 783 NM = 1; 784 785 j = low; 786 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 787 { 788 indx1[i] = local_0_start*partial_dim + i; 789 indx2[i] = j; 790 if (k%dim[ndim-1]==0) 791 { j+=NM;} 792 j++; 793 } 794 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 795 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 796 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 797 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 798 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 799 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 800 ierr = ISDestroy(&list1);CHKERRQ(ierr); 801 ierr = ISDestroy(&list2);CHKERRQ(ierr); 802 ierr = PetscFree(indx1);CHKERRQ(ierr); 803 ierr = PetscFree(indx2);CHKERRQ(ierr); 804 #endif 805 break; 806 } 807 } 808 return(0); 809 } 810 EXTERN_C_END 811 812 #undef __FUNCT__ 813 #define __FUNCT__ "VecScatterFFTWToPetsc" 814 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 815 { 816 PetscErrorCode ierr; 817 PetscFunctionBegin; 818 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 819 PetscFunctionReturn(0); 820 } 821 822 /* 823 VecScatterFFTWToPetsc_FFTW - Converts FFTW output to the PETSc vector that user can use. 824 Input A, x, y 825 A - FFTW matrix 826 x - FFTW vector 827 y - PETSc vector that user can read 828 829 Level: intermediate 830 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 831 VecScatterFFTWToPetsc removes those extra zeros. 832 833 */ 834 835 EXTERN_C_BEGIN 836 #undef __FUNCT__ 837 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 838 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 839 { 840 PetscErrorCode ierr; 841 MPI_Comm comm=((PetscObject)A)->comm; 842 Mat_FFT *fft = (Mat_FFT*)A->data; 843 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 844 PetscInt N=fft->N; 845 PetscInt ndim=fft->ndim,*dim=fft->dim; 846 PetscInt low; 847 PetscInt rank,size; 848 ptrdiff_t alloc_local,local_n0,local_0_start; 849 ptrdiff_t local_n1,local_1_start; 850 VecScatter vecscat; 851 IS list1,list2; 852 #if !defined(PETSC_USE_COMPLEX) 853 PetscInt i,j,k,partial_dim; 854 PetscInt *indx1, *indx2, tempindx, tempindx1; 855 PetscInt N1, n1 ,NM; 856 ptrdiff_t temp; 857 #endif 858 859 860 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 861 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 862 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 863 864 if (size==1){ 865 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 866 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 867 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 869 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 870 ierr = ISDestroy(&list1);CHKERRQ(ierr); 871 } 872 else{ 873 874 switch (ndim){ 875 case 1: 876 #if defined(PETSC_USE_COMPLEX) 877 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 878 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 879 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 880 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 881 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 882 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 883 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 884 ierr = ISDestroy(&list1);CHKERRQ(ierr); 885 ierr = ISDestroy(&list2);CHKERRQ(ierr); 886 #else 887 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 888 #endif 889 break; 890 case 2: 891 #if defined(PETSC_USE_COMPLEX) 892 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 893 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 894 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 895 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 896 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 897 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 898 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 899 ierr = ISDestroy(&list1);CHKERRQ(ierr); 900 ierr = ISDestroy(&list2);CHKERRQ(ierr); 901 #else 902 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 903 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 904 905 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 906 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 907 908 if (dim[1]%2==0) 909 NM = dim[1]+2; 910 else 911 NM = dim[1]+1; 912 913 for (i=0;i<local_n0;i++){ 914 for (j=0;j<dim[1];j++){ 915 tempindx = i*dim[1] + j; 916 tempindx1 = i*NM + j; 917 indx1[tempindx]=local_0_start*dim[1]+tempindx; 918 indx2[tempindx]=low+tempindx1; 919 } 920 } 921 922 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 923 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 924 925 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 926 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 927 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 929 ierr = ISDestroy(&list1);CHKERRQ(ierr); 930 ierr = ISDestroy(&list2);CHKERRQ(ierr); 931 ierr = PetscFree(indx1);CHKERRQ(ierr); 932 ierr = PetscFree(indx2);CHKERRQ(ierr); 933 #endif 934 break; 935 case 3: 936 #if defined(PETSC_USE_COMPLEX) 937 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 938 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 939 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 940 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 941 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 942 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 943 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 944 ierr = ISDestroy(&list1);CHKERRQ(ierr); 945 ierr = ISDestroy(&list2);CHKERRQ(ierr); 946 #else 947 948 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 949 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 950 951 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 952 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 953 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 954 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 955 956 if (dim[2]%2==0) 957 NM = dim[2]+2; 958 else 959 NM = dim[2]+1; 960 961 for (i=0;i<local_n0;i++){ 962 for (j=0;j<dim[1];j++){ 963 for (k=0;k<dim[2];k++){ 964 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 965 tempindx1 = i*dim[1]*NM + j*NM + k; 966 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 967 indx2[tempindx]=low+tempindx1; 968 } 969 } 970 } 971 972 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 973 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 974 975 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 976 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 979 ierr = ISDestroy(&list1);CHKERRQ(ierr); 980 ierr = ISDestroy(&list2);CHKERRQ(ierr); 981 ierr = PetscFree(indx1);CHKERRQ(ierr); 982 ierr = PetscFree(indx2);CHKERRQ(ierr); 983 #endif 984 break; 985 default: 986 #if defined(PETSC_USE_COMPLEX) 987 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 988 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 989 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 990 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 991 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 992 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 994 ierr = ISDestroy(&list1);CHKERRQ(ierr); 995 ierr = ISDestroy(&list2);CHKERRQ(ierr); 996 #else 997 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 998 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 999 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1000 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1001 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1002 1003 partial_dim = fftw->partial_dim; 1004 1005 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1006 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1007 1008 if (dim[ndim-1]%2==0) 1009 NM = 2; 1010 else 1011 NM = 1; 1012 1013 j = low; 1014 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1015 { 1016 indx1[i] = local_0_start*partial_dim + i; 1017 indx2[i] = j; 1018 if (k%dim[ndim-1]==0) 1019 { j+=NM;} 1020 j++; 1021 } 1022 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1023 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1024 1025 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1026 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1028 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1029 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1030 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1031 ierr = PetscFree(indx1);CHKERRQ(ierr); 1032 ierr = PetscFree(indx2);CHKERRQ(ierr); 1033 #endif 1034 break; 1035 } 1036 } 1037 return 0; 1038 } 1039 EXTERN_C_END 1040 1041 EXTERN_C_BEGIN 1042 #undef __FUNCT__ 1043 #define __FUNCT__ "MatCreate_FFTW" 1044 /* 1045 MatCreate_FFTW - Creates a matrix object that provides FFT 1046 via the external package FFTW 1047 Options Database Keys: 1048 + -mat_fftw_plannerflags - set FFTW planner flags 1049 1050 Level: intermediate 1051 1052 */ 1053 1054 PetscErrorCode MatCreate_FFTW(Mat A) 1055 { 1056 PetscErrorCode ierr; 1057 MPI_Comm comm=((PetscObject)A)->comm; 1058 Mat_FFT *fft=(Mat_FFT*)A->data; 1059 Mat_FFTW *fftw; 1060 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1061 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1062 PetscBool flg; 1063 PetscInt p_flag,partial_dim=1,ctr; 1064 PetscMPIInt size,rank; 1065 ptrdiff_t *pdim; 1066 ptrdiff_t local_n1,local_1_start; 1067 #if !defined(PETSC_USE_COMPLEX) 1068 ptrdiff_t temp; 1069 PetscInt N1,tot_dim; 1070 #else 1071 PetscInt n1; 1072 #endif 1073 1074 PetscFunctionBegin; 1075 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1076 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1077 1078 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1079 pdim[0] = dim[0]; 1080 #if !defined(PETSC_USE_COMPLEX) 1081 tot_dim = 2*dim[0]; 1082 #endif 1083 for (ctr=1;ctr<ndim;ctr++) 1084 { 1085 partial_dim *= dim[ctr]; 1086 pdim[ctr] = dim[ctr]; 1087 #if !defined(PETSC_USE_COMPLEX) 1088 if (ctr==ndim-1) 1089 tot_dim *= (dim[ctr]/2+1); 1090 else 1091 tot_dim *= dim[ctr]; 1092 #endif 1093 } 1094 1095 if (size == 1) { 1096 #if defined(PETSC_USE_COMPLEX) 1097 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1098 n = N; 1099 #else 1100 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1101 n = tot_dim; 1102 #endif 1103 1104 } else { 1105 ptrdiff_t alloc_local,local_n0,local_0_start; 1106 switch (ndim){ 1107 case 1: 1108 #if !defined(PETSC_USE_COMPLEX) 1109 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1110 #else 1111 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1112 n = (PetscInt)local_n0; 1113 n1 = (PetscInt) local_n1; 1114 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1115 #endif 1116 break; 1117 case 2: 1118 #if defined(PETSC_USE_COMPLEX) 1119 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1120 /* 1121 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1122 PetscSynchronizedFlush(comm); 1123 */ 1124 n = (PetscInt)local_n0*dim[1]; 1125 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1126 #else 1127 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1128 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1129 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1130 #endif 1131 break; 1132 case 3: 1133 #if defined(PETSC_USE_COMPLEX) 1134 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1135 n = (PetscInt)local_n0*dim[1]*dim[2]; 1136 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1137 #else 1138 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1139 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1140 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1141 #endif 1142 break; 1143 default: 1144 #if defined(PETSC_USE_COMPLEX) 1145 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1146 n = (PetscInt)local_n0*partial_dim; 1147 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1148 #else 1149 temp = pdim[ndim-1]; 1150 pdim[ndim-1] = temp/2 + 1; 1151 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1152 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1153 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1154 pdim[ndim-1] = temp; 1155 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1156 #endif 1157 break; 1158 } 1159 } 1160 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1161 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1162 fft->data = (void*)fftw; 1163 1164 fft->n = n; 1165 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1166 fftw->partial_dim = partial_dim; 1167 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1168 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1169 1170 fftw->p_forward = 0; 1171 fftw->p_backward = 0; 1172 fftw->p_flag = FFTW_ESTIMATE; 1173 1174 if (size == 1){ 1175 A->ops->mult = MatMult_SeqFFTW; 1176 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1177 } else { 1178 A->ops->mult = MatMult_MPIFFTW; 1179 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1180 } 1181 fft->matdestroy = MatDestroy_FFTW; 1182 //A->ops->getvecs = MatGetVecs_FFTW; 1183 A->assembled = PETSC_TRUE; 1184 PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 1185 PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW); 1186 PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW); 1187 1188 /* get runtime options */ 1189 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1190 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1191 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1192 PetscOptionsEnd(); 1193 PetscFunctionReturn(0); 1194 } 1195 EXTERN_C_END 1196 1197 1198 1199 1200