1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed! 28 29 #undef __FUNCT__ 30 #define __FUNCT__ "MatMult_SeqFFTW" 31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32 { 33 PetscErrorCode ierr; 34 Mat_FFT *fft = (Mat_FFT*)A->data; 35 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36 PetscScalar *x_array,*y_array; 37 PetscInt ndim=fft->ndim,*dim=fft->dim; 38 39 PetscFunctionBegin; 40 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 41 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 42 if (!fftw->p_forward){ /* create a plan, then excute it */ 43 switch (ndim){ 44 case 1: 45 #if defined(PETSC_USE_COMPLEX) 46 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 47 #else 48 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 49 #endif 50 break; 51 case 2: 52 #if defined(PETSC_USE_COMPLEX) 53 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54 #else 55 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 56 #endif 57 break; 58 case 3: 59 #if defined(PETSC_USE_COMPLEX) 60 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 61 #else 62 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 63 #endif 64 break; 65 default: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 70 #endif 71 break; 72 } 73 fftw->finarray = x_array; 74 fftw->foutarray = y_array; 75 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 76 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 77 fftw_execute(fftw->p_forward); 78 } else { /* use existing plan */ 79 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 80 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 81 } else { 82 fftw_execute(fftw->p_forward); 83 } 84 } 85 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 86 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 92 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 93 { 94 PetscErrorCode ierr; 95 Mat_FFT *fft = (Mat_FFT*)A->data; 96 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 97 PetscScalar *x_array,*y_array; 98 PetscInt ndim=fft->ndim,*dim=fft->dim; 99 100 PetscFunctionBegin; 101 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 102 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 103 if (!fftw->p_backward){ /* create a plan, then excute it */ 104 switch (ndim){ 105 case 1: 106 #if defined(PETSC_USE_COMPLEX) 107 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 108 #else 109 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 110 #endif 111 break; 112 case 2: 113 #if defined(PETSC_USE_COMPLEX) 114 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 115 #else 116 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 117 #endif 118 break; 119 case 3: 120 #if defined(PETSC_USE_COMPLEX) 121 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 122 #else 123 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 124 #endif 125 break; 126 default: 127 #if defined(PETSC_USE_COMPLEX) 128 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 129 #else 130 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 131 #endif 132 break; 133 } 134 fftw->binarray = x_array; 135 fftw->boutarray = y_array; 136 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 137 } else { /* use existing plan */ 138 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 139 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 140 } else { 141 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 142 } 143 } 144 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 145 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatMult_MPIFFTW" 151 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 152 { 153 PetscErrorCode ierr; 154 Mat_FFT *fft = (Mat_FFT*)A->data; 155 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 156 PetscScalar *x_array,*y_array; 157 PetscInt ndim=fft->ndim,*dim=fft->dim; 158 MPI_Comm comm=((PetscObject)A)->comm; 159 160 PetscFunctionBegin; 161 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 162 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163 if (!fftw->p_forward){ /* create a plan, then excute it */ 164 switch (ndim){ 165 case 1: 166 #if defined(PETSC_USE_COMPLEX) 167 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 168 #else 169 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 170 #endif 171 break; 172 case 2: 173 #if defined(PETSC_USE_COMPLEX) 174 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 175 #else 176 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 177 #endif 178 break; 179 case 3: 180 #if defined(PETSC_USE_COMPLEX) 181 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 182 #else 183 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 184 #endif 185 break; 186 default: 187 #if defined(PETSC_USE_COMPLEX) 188 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 189 #else 190 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 191 #endif 192 break; 193 } 194 fftw->finarray = x_array; 195 fftw->foutarray = y_array; 196 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 197 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 198 fftw_execute(fftw->p_forward); 199 } else { /* use existing plan */ 200 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 201 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202 } else { 203 fftw_execute(fftw->p_forward); 204 } 205 } 206 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 213 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 214 { 215 PetscErrorCode ierr; 216 Mat_FFT *fft = (Mat_FFT*)A->data; 217 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 218 PetscScalar *x_array,*y_array; 219 PetscInt ndim=fft->ndim,*dim=fft->dim; 220 MPI_Comm comm=((PetscObject)A)->comm; 221 222 PetscFunctionBegin; 223 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 224 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 225 if (!fftw->p_backward){ /* create a plan, then excute it */ 226 switch (ndim){ 227 case 1: 228 #if defined(PETSC_USE_COMPLEX) 229 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 230 #else 231 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 232 #endif 233 break; 234 case 2: 235 #if defined(PETSC_USE_COMPLEX) 236 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 237 #else 238 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 239 #endif 240 break; 241 case 3: 242 #if defined(PETSC_USE_COMPLEX) 243 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 244 #else 245 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 246 #endif 247 break; 248 default: 249 #if defined(PETSC_USE_COMPLEX) 250 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 251 #else 252 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 253 #endif 254 break; 255 } 256 fftw->binarray = x_array; 257 fftw->boutarray = y_array; 258 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 259 } else { /* use existing plan */ 260 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 261 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 262 } else { 263 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264 } 265 } 266 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 267 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatDestroy_FFTW" 273 PetscErrorCode MatDestroy_FFTW(Mat A) 274 { 275 Mat_FFT *fft = (Mat_FFT*)A->data; 276 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 fftw_destroy_plan(fftw->p_forward); 281 fftw_destroy_plan(fftw->p_backward); 282 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 283 ierr = PetscFree(fft->data);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 288 #undef __FUNCT__ 289 #define __FUNCT__ "VecDestroy_MPIFFTW" 290 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 291 { 292 PetscErrorCode ierr; 293 PetscScalar *array; 294 295 PetscFunctionBegin; 296 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 297 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 298 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 299 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 304 #undef __FUNCT__ 305 #define __FUNCT__ "MatGetVecsFFTW" 306 /*@ 307 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 308 parallel layout determined by FFTW 309 310 Collective on Mat 311 312 Input Parameter: 313 . mat - the matrix 314 315 Output Parameter: 316 + fin - (optional) input vector of forward FFTW 317 - fout - (optional) output vector of forward FFTW 318 319 Level: advanced 320 321 .seealso: MatCreateFFTW() 322 @*/ 323 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 324 { 325 PetscErrorCode ierr; 326 PetscFunctionBegin; 327 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 EXTERN_C_BEGIN 332 #undef __FUNCT__ 333 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 334 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 335 { 336 PetscErrorCode ierr; 337 PetscMPIInt size,rank; 338 MPI_Comm comm=((PetscObject)A)->comm; 339 Mat_FFT *fft = (Mat_FFT*)A->data; 340 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 341 PetscInt N=fft->N; 342 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 343 344 PetscFunctionBegin; 345 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 346 PetscValidType(A,1); 347 348 ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 349 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 350 if (size == 1){ /* sequential case */ 351 #if defined(PETSC_USE_COMPLEX) 352 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 353 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 354 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 355 #else 356 // if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 357 // if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 358 // if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 359 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 360 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 361 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 362 #endif 363 } else { 364 ptrdiff_t alloc_local,local_n0,local_0_start; 365 ptrdiff_t local_n1; 366 fftw_complex *data_fout; 367 ptrdiff_t local_1_start; 368 #if defined(PETSC_USE_COMPLEX) 369 fftw_complex *data_fin,*data_bout; 370 #else 371 double *data_finr,*data_boutr; 372 PetscInt n1,N1,vsize; 373 ptrdiff_t temp; 374 #endif 375 376 switch (ndim){ 377 case 1: 378 #if !defined(PETSC_USE_COMPLEX) 379 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 380 #else 381 //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 382 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 383 if (fin) { 384 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 385 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 386 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 387 } 388 if (fout) { 389 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 390 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 391 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 392 } 393 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 394 if (bout) { 395 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 396 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 397 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 398 } 399 break; 400 #endif 401 case 2: 402 #if !defined(PETSC_USE_COMPLEX) 403 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 404 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 405 if (fin) { 406 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 407 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 408 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 409 } 410 if (bout) { 411 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 412 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 413 ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 414 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 415 } 416 //n1 = 2*local_n1*dim[0]; 417 if (fout) { 418 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 419 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 420 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 421 } 422 #else 423 /* Get local size */ 424 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 425 if (fin) { 426 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 427 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 428 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 429 } 430 if (fout) { 431 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 432 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 433 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 434 } 435 if (bout) { 436 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 437 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 438 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 439 } 440 #endif 441 break; 442 case 3: 443 #if !defined(PETSC_USE_COMPLEX) 444 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 445 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 446 if (fin) { 447 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 448 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 449 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 450 } 451 if (bout) { 452 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 453 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 454 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 455 } 456 //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 457 if (fout) { 458 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 459 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 460 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 461 } 462 #else 463 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 464 if (fin) { 465 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 466 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 467 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 468 } 469 if (fout) { 470 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 471 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 472 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 473 } 474 if (bout) { 475 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 476 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 477 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 478 } 479 #endif 480 break; 481 default: 482 #if !defined(PETSC_USE_COMPLEX) 483 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 484 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 485 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 486 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 487 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 488 489 if (fin) { 490 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 491 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 492 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 493 } 494 if (bout) { 495 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 496 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 497 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 498 } 499 //temp = fftw->partial_dim; 500 //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 501 //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 502 if (fout) { 503 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 504 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 505 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 506 } 507 #else 508 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 509 if (fin) { 510 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 511 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 512 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 513 } 514 if (fout) { 515 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 516 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 517 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 518 } 519 if (bout) { 520 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 521 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 522 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 523 } 524 #endif 525 break; 526 } 527 } 528 PetscFunctionReturn(0); 529 } 530 EXTERN_C_END 531 532 #undef __FUNCT__ 533 #define __FUNCT__ "MatGetVecs_FFTW" 534 /* 535 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 536 parallel layout determined by FFTW 537 538 Collective on Mat 539 540 Input Parameter: 541 . mat - the matrix 542 543 Output Parameter: 544 + fin - (optional) input vector of forward FFTW 545 - fout - (optional) output vector of forward FFTW 546 547 Level: advanced 548 549 .seealso: MatCreateFFTW() 550 */ 551 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 552 { 553 PetscErrorCode ierr; 554 PetscMPIInt size,rank; 555 MPI_Comm comm=((PetscObject)A)->comm; 556 Mat_FFT *fft = (Mat_FFT*)A->data; 557 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 558 PetscInt N=fft->N; 559 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 560 561 PetscFunctionBegin; 562 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 563 PetscValidType(A,1); 564 565 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 566 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 567 if (size == 1){ /* sequential case */ 568 #if defined(PETSC_USE_COMPLEX) 569 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 570 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 571 #else 572 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 573 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 574 #endif 575 } else { /* mpi case */ 576 ptrdiff_t alloc_local,local_n0,local_0_start; 577 ptrdiff_t local_n1,local_1_end; 578 fftw_complex *data_fin,*data_fout; 579 #if !defined(PETSC_USE_COMPLEX) 580 double *data_finr ; 581 ptrdiff_t local_1_start,temp; 582 PetscInt vsize,n1,N1; 583 #endif 584 585 // PetscInt ctr; 586 // ptrdiff_t ndim1,*pdim; 587 // ndim1=(ptrdiff_t) ndim; 588 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 589 590 // for(ctr=0;ctr<ndim;ctr++) 591 // {k 592 // pdim[ctr] = dim[ctr]; 593 // } 594 595 596 597 switch (ndim){ 598 case 1: 599 /* Get local size */ 600 /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 601 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 602 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 603 printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1); 604 if (fin) { 605 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 606 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 607 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 608 } 609 if (fout) { 610 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 611 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 612 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 613 } 614 break; 615 case 2: 616 #if !defined(PETSC_USE_COMPLEX) 617 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 618 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 619 if (fin) { 620 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 621 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 622 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 623 //printf("The code comes here with vector size %d\n",vsize); 624 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 625 } 626 n1 = 2*local_n1*(dim[0]); 627 //n1 = 2*local_n1*dim[1]; 628 printf("The values are %ld\n",local_n1); 629 if (fout) { 630 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 631 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 632 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 633 } 634 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 635 636 #else 637 /* Get local size */ 638 //printf("Hope this does not come here"); 639 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 640 if (fin) { 641 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 642 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 643 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 644 } 645 if (fout) { 646 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 647 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 648 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 649 } 650 //printf("Hope this does not come here"); 651 #endif 652 break; 653 case 3: 654 /* Get local size */ 655 #if !defined(PETSC_USE_COMPLEX) 656 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 657 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 658 if (fin) { 659 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 660 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 661 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 662 //printf("The code comes here with vector size %d\n",vsize); 663 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 664 } 665 n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 666 if (fout) { 667 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 668 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 669 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 670 } 671 672 673 #else 674 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 675 // printf("The quantity n is %d",n); 676 if (fin) { 677 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 678 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 679 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 680 } 681 if (fout) { 682 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 683 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 684 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 685 } 686 #endif 687 break; 688 default: 689 /* Get local size */ 690 #if !defined(PETSC_USE_COMPLEX) 691 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 692 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 693 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 694 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 695 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 696 if (fin) { 697 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 698 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 699 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 700 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 701 } 702 temp = fftw->partial_dim; 703 fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 704 705 n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 706 if (fout) { 707 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 708 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 709 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 710 } 711 712 #else 713 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 714 if (fin) { 715 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 716 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 717 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 718 } 719 if (fout) { 720 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 721 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 722 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 723 } 724 #endif 725 break; 726 } 727 } 728 // if (fin){ 729 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 730 // } 731 // if (fout){ 732 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 733 // } 734 PetscFunctionReturn(0); 735 } 736 737 #undef __FUNCT__ 738 #define __FUNCT__ "InputTransformFFT" 739 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 740 { 741 PetscErrorCode ierr; 742 PetscFunctionBegin; 743 ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 744 PetscFunctionReturn(0); 745 } 746 747 /* 748 InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real 749 parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst 750 changing dimension. 751 Input A, x, y 752 A - FFTW matrix 753 x - user data 754 Options Database Keys: 755 + -mat_fftw_plannerflags - set FFTW planner flags 756 757 Level: intermediate 758 759 */ 760 761 EXTERN_C_BEGIN 762 #undef __FUNCT__ 763 #define __FUNCT__ "InputTransformFFT_FTTW" 764 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 765 { 766 PetscErrorCode ierr; 767 MPI_Comm comm=((PetscObject)A)->comm; 768 Mat_FFT *fft = (Mat_FFT*)A->data; 769 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 770 PetscInt N=fft->N; 771 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 772 PetscInt low; 773 PetscInt rank,size,vsize,vsize1; 774 ptrdiff_t alloc_local,local_n0,local_0_start; 775 ptrdiff_t local_n1,local_1_start; 776 VecScatter vecscat; 777 IS list1,list2; 778 #if !defined(PETSC_USE_COMPLEX) 779 PetscInt i,j,k,partial_dim; 780 PetscInt *indx1, *indx2, tempindx, tempindx1; 781 PetscInt N1, n1 ,NM; 782 ptrdiff_t temp; 783 #endif 784 785 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 786 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 787 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 788 789 if (size==1) 790 { 791 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 792 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 793 printf("The values of sizes are %d and %d",vsize,vsize1); 794 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 795 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 796 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 797 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 798 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 799 ierr = ISDestroy(&list1);CHKERRQ(ierr); 800 } 801 802 else{ 803 804 switch (ndim){ 805 case 1: 806 #if defined(PETSC_USE_COMPLEX) 807 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 808 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 809 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 810 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 811 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 812 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 813 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 814 ierr = ISDestroy(&list1);CHKERRQ(ierr); 815 ierr = ISDestroy(&list2);CHKERRQ(ierr); 816 #else 817 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 818 #endif 819 break; 820 case 2: 821 #if defined(PETSC_USE_COMPLEX) 822 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 823 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 824 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 825 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 826 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 827 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 829 ierr = ISDestroy(&list1);CHKERRQ(ierr); 830 ierr = ISDestroy(&list2);CHKERRQ(ierr); 831 #else 832 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 833 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 834 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 835 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 836 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 837 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 838 839 if (dim[1]%2==0) 840 NM = dim[1]+2; 841 else 842 NM = dim[1]+1; 843 844 for (i=0;i<local_n0;i++){ 845 for (j=0;j<dim[1];j++){ 846 tempindx = i*dim[1] + j; 847 tempindx1 = i*NM + j; 848 indx1[tempindx]=local_0_start*dim[1]+tempindx; 849 indx2[tempindx]=low+tempindx1; 850 } 851 } 852 853 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 854 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 855 856 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 857 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 858 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 859 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 860 ierr = ISDestroy(&list1);CHKERRQ(ierr); 861 ierr = ISDestroy(&list2);CHKERRQ(ierr); 862 ierr = PetscFree(indx1);CHKERRQ(ierr); 863 ierr = PetscFree(indx2);CHKERRQ(ierr); 864 #endif 865 break; 866 867 case 3: 868 #if defined(PETSC_USE_COMPLEX) 869 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 870 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 871 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 872 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 873 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 874 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 875 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 876 ierr = ISDestroy(&list1);CHKERRQ(ierr); 877 ierr = ISDestroy(&list2);CHKERRQ(ierr); 878 #else 879 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 880 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 881 882 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 883 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 884 885 if (dim[2]%2==0) 886 NM = dim[2]+2; 887 else 888 NM = dim[2]+1; 889 890 for (i=0;i<local_n0;i++){ 891 for (j=0;j<dim[1];j++){ 892 for (k=0;k<dim[2];k++){ 893 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 894 tempindx1 = i*dim[1]*NM + j*NM + k; 895 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 896 indx2[tempindx]=low+tempindx1; 897 } 898 } 899 } 900 901 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 902 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 903 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 904 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 905 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 906 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 907 ierr = ISDestroy(&list1);CHKERRQ(ierr); 908 ierr = ISDestroy(&list2);CHKERRQ(ierr); 909 ierr = PetscFree(indx1);CHKERRQ(ierr); 910 ierr = PetscFree(indx2);CHKERRQ(ierr); 911 #endif 912 break; 913 914 default: 915 #if defined(PETSC_USE_COMPLEX) 916 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 917 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 918 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 919 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 920 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 921 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 923 ierr = ISDestroy(&list1);CHKERRQ(ierr); 924 ierr = ISDestroy(&list2);CHKERRQ(ierr); 925 #else 926 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 927 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 928 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 929 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 930 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 931 932 partial_dim = fftw->partial_dim; 933 934 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 935 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 936 937 if (dim[ndim-1]%2==0) 938 NM = 2; 939 else 940 NM = 1; 941 942 j = low; 943 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 944 { 945 indx1[i] = local_0_start*partial_dim + i; 946 indx2[i] = j; 947 if (k%dim[ndim-1]==0) 948 { j+=NM;} 949 j++; 950 } 951 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 952 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 953 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 954 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 956 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 957 ierr = ISDestroy(&list1);CHKERRQ(ierr); 958 ierr = ISDestroy(&list2);CHKERRQ(ierr); 959 ierr = PetscFree(indx1);CHKERRQ(ierr); 960 ierr = PetscFree(indx2);CHKERRQ(ierr); 961 #endif 962 break; 963 } 964 } 965 return(0); 966 } 967 EXTERN_C_END 968 969 #undef __FUNCT__ 970 #define __FUNCT__ "OutputTransformFFT" 971 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 972 { 973 PetscErrorCode ierr; 974 PetscFunctionBegin; 975 ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 976 PetscFunctionReturn(0); 977 } 978 979 /* 980 OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 981 Input A, x, y 982 A - FFTW matrix 983 x - FFTW vector 984 y - PETSc vector that user can read 985 Options Database Keys: 986 + -mat_fftw_plannerflags - set FFTW planner flags 987 988 Level: intermediate 989 990 */ 991 992 EXTERN_C_BEGIN 993 #undef __FUNCT__ 994 #define __FUNCT__ "OutputTransformFFT_FTTW" 995 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 996 { 997 PetscErrorCode ierr; 998 MPI_Comm comm=((PetscObject)A)->comm; 999 Mat_FFT *fft = (Mat_FFT*)A->data; 1000 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 1001 PetscInt N=fft->N; 1002 PetscInt ndim=fft->ndim,*dim=fft->dim; 1003 PetscInt low; 1004 PetscInt rank,size; 1005 ptrdiff_t alloc_local,local_n0,local_0_start; 1006 ptrdiff_t local_n1,local_1_start; 1007 VecScatter vecscat; 1008 IS list1,list2; 1009 #if !defined(PETSC_USE_COMPLEX) 1010 PetscInt i,j,k,partial_dim; 1011 PetscInt *indx1, *indx2, tempindx, tempindx1; 1012 PetscInt N1, n1 ,NM; 1013 ptrdiff_t temp; 1014 #endif 1015 1016 1017 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1018 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1019 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 1020 1021 if (size==1){ 1022 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 1023 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1024 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1025 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1027 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1028 } 1029 else{ 1030 1031 switch (ndim){ 1032 case 1: 1033 #if defined(PETSC_USE_COMPLEX) 1034 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1035 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 1036 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 1037 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1038 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1039 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1040 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1041 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1042 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1043 #else 1044 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1045 #endif 1046 break; 1047 case 2: 1048 #if defined(PETSC_USE_COMPLEX) 1049 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1050 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1051 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1052 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1053 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1054 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1055 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1056 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1057 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1058 #else 1059 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1060 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 1061 1062 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 1063 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 1064 1065 if (dim[1]%2==0) 1066 NM = dim[1]+2; 1067 else 1068 NM = dim[1]+1; 1069 1070 for (i=0;i<local_n0;i++){ 1071 for (j=0;j<dim[1];j++){ 1072 tempindx = i*dim[1] + j; 1073 tempindx1 = i*NM + j; 1074 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1075 indx2[tempindx]=low+tempindx1; 1076 } 1077 } 1078 1079 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1080 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1081 1082 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1083 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1084 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1085 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1086 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1087 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1088 ierr = PetscFree(indx1);CHKERRQ(ierr); 1089 ierr = PetscFree(indx2);CHKERRQ(ierr); 1090 #endif 1091 break; 1092 case 3: 1093 #if defined(PETSC_USE_COMPLEX) 1094 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1095 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1096 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1097 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1098 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1099 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1100 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1101 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1102 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1103 #else 1104 1105 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1106 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1107 1108 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1109 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1110 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1111 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1112 1113 if (dim[2]%2==0) 1114 NM = dim[2]+2; 1115 else 1116 NM = dim[2]+1; 1117 1118 for (i=0;i<local_n0;i++){ 1119 for (j=0;j<dim[1];j++){ 1120 for (k=0;k<dim[2];k++){ 1121 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1122 tempindx1 = i*dim[1]*NM + j*NM + k; 1123 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1124 indx2[tempindx]=low+tempindx1; 1125 } 1126 } 1127 } 1128 1129 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1130 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1131 1132 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1133 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1134 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1135 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1136 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1137 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1138 ierr = PetscFree(indx1);CHKERRQ(ierr); 1139 ierr = PetscFree(indx2);CHKERRQ(ierr); 1140 #endif 1141 break; 1142 default: 1143 #if defined(PETSC_USE_COMPLEX) 1144 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1145 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1146 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1147 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1148 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1149 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1150 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1151 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1152 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1153 #else 1154 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1155 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1156 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1157 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1158 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1159 1160 partial_dim = fftw->partial_dim; 1161 1162 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1163 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1164 1165 if (dim[ndim-1]%2==0) 1166 NM = 2; 1167 else 1168 NM = 1; 1169 1170 j = low; 1171 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1172 { 1173 indx1[i] = local_0_start*partial_dim + i; 1174 indx2[i] = j; 1175 if (k%dim[ndim-1]==0) 1176 { j+=NM;} 1177 j++; 1178 } 1179 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1180 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1181 1182 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1183 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1184 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1185 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1186 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1187 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1188 ierr = PetscFree(indx1);CHKERRQ(ierr); 1189 ierr = PetscFree(indx2);CHKERRQ(ierr); 1190 #endif 1191 break; 1192 } 1193 } 1194 return 0; 1195 } 1196 EXTERN_C_END 1197 1198 EXTERN_C_BEGIN 1199 #undef __FUNCT__ 1200 #define __FUNCT__ "MatCreate_FFTW" 1201 /* 1202 MatCreate_FFTW - Creates a matrix object that provides FFT 1203 via the external package FFTW 1204 Options Database Keys: 1205 + -mat_fftw_plannerflags - set FFTW planner flags 1206 1207 Level: intermediate 1208 1209 */ 1210 1211 PetscErrorCode MatCreate_FFTW(Mat A) 1212 { 1213 PetscErrorCode ierr; 1214 MPI_Comm comm=((PetscObject)A)->comm; 1215 Mat_FFT *fft=(Mat_FFT*)A->data; 1216 Mat_FFTW *fftw; 1217 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1218 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1219 PetscBool flg; 1220 PetscInt p_flag,partial_dim=1,ctr; 1221 PetscMPIInt size,rank; 1222 ptrdiff_t *pdim; 1223 ptrdiff_t local_n1,local_1_start; 1224 #if !defined(PETSC_USE_COMPLEX) 1225 ptrdiff_t temp; 1226 PetscInt N1,tot_dim; 1227 #else 1228 PetscInt n1; 1229 #endif 1230 1231 1232 PetscFunctionBegin; 1233 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1234 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1235 ierr = MPI_Barrier(PETSC_COMM_WORLD); 1236 1237 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1238 pdim[0] = dim[0]; 1239 #if !defined(PETSC_USE_COMPLEX) 1240 tot_dim = 2*dim[0]; 1241 #endif 1242 for (ctr=1;ctr<ndim;ctr++) 1243 { 1244 partial_dim *= dim[ctr]; 1245 pdim[ctr] = dim[ctr]; 1246 #if !defined(PETSC_USE_COMPLEX) 1247 if (ctr==ndim-1) 1248 tot_dim *= (dim[ctr]/2+1); 1249 else 1250 tot_dim *= dim[ctr]; 1251 #endif 1252 } 1253 1254 if (size == 1) { 1255 #if defined(PETSC_USE_COMPLEX) 1256 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1257 n = N; 1258 #else 1259 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1260 n = tot_dim; 1261 #endif 1262 1263 } else { 1264 ptrdiff_t alloc_local,local_n0,local_0_start; 1265 switch (ndim){ 1266 case 1: 1267 #if !defined(PETSC_USE_COMPLEX) 1268 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1269 #else 1270 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1271 n = (PetscInt)local_n0; 1272 n1 = (PetscInt) local_n1; 1273 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1274 #endif 1275 break; 1276 case 2: 1277 #if defined(PETSC_USE_COMPLEX) 1278 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1279 /* 1280 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1281 PetscSynchronizedFlush(comm); 1282 */ 1283 n = (PetscInt)local_n0*dim[1]; 1284 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1285 #else 1286 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1287 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1288 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1289 #endif 1290 break; 1291 case 3: 1292 #if defined(PETSC_USE_COMPLEX) 1293 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1294 n = (PetscInt)local_n0*dim[1]*dim[2]; 1295 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1296 #else 1297 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1298 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1299 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1300 #endif 1301 break; 1302 default: 1303 #if defined(PETSC_USE_COMPLEX) 1304 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1305 n = (PetscInt)local_n0*partial_dim; 1306 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1307 #else 1308 temp = pdim[ndim-1]; 1309 pdim[ndim-1] = temp/2 + 1; 1310 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1311 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1312 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1313 pdim[ndim-1] = temp; 1314 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1315 #endif 1316 break; 1317 } 1318 } 1319 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1320 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1321 fft->data = (void*)fftw; 1322 1323 fft->n = n; 1324 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1325 fftw->partial_dim = partial_dim; 1326 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1327 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1328 1329 fftw->p_forward = 0; 1330 fftw->p_backward = 0; 1331 fftw->p_flag = FFTW_ESTIMATE; 1332 1333 if (size == 1){ 1334 A->ops->mult = MatMult_SeqFFTW; 1335 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1336 } else { 1337 A->ops->mult = MatMult_MPIFFTW; 1338 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1339 } 1340 fft->matdestroy = MatDestroy_FFTW; 1341 //A->ops->getvecs = MatGetVecs_FFTW; 1342 A->assembled = PETSC_TRUE; 1343 PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 1344 PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 1345 PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1346 1347 /* get runtime options */ 1348 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1349 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1350 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1351 PetscOptionsEnd(); 1352 PetscFunctionReturn(0); 1353 } 1354 EXTERN_C_END 1355 1356 1357 1358 1359