xref: /petsc/src/sys/fileio/fretrieve.c (revision 750b007cd8d816cecd9de99077bb0a703b4cf61a)
1 
2 /*
3       Code for opening and closing files.
4 */
5 #include <petscsys.h>
6 #if defined(PETSC_HAVE_PWD_H)
7 #include <pwd.h>
8 #endif
9 #include <ctype.h>
10 #include <sys/stat.h>
11 #if defined(PETSC_HAVE_UNISTD_H)
12 #include <unistd.h>
13 #endif
14 #if defined(PETSC_HAVE_SYS_UTSNAME_H)
15 #include <sys/utsname.h>
16 #endif
17 #include <fcntl.h>
18 #include <time.h>
19 #if defined(PETSC_HAVE_SYS_SYSTEMINFO_H)
20 #include <sys/systeminfo.h>
21 #endif
22 
23 /*
24    Private routine to delete tmp/shared storage
25 
26    This is called by MPI, not by users.
27 
28    Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
29 
30 */
31 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelTmpShared(MPI_Comm comm, PetscMPIInt keyval, void *count_val, void *extra_state) {
32   PetscFunctionBegin;
33   PetscCallMPI(PetscInfo(NULL, "Deleting tmp/shared data in an MPI_Comm %ld\n", (long)comm));
34   PetscCallMPI(PetscFree(count_val));
35   PetscFunctionReturn(MPI_SUCCESS);
36 }
37 
38 /*@C
39    PetscGetTmp - Gets the name of the tmp directory
40 
41    Collective
42 
43    Input Parameters:
44 +  comm - MPI_Communicator that may share /tmp
45 -  len - length of string to hold name
46 
47    Output Parameter:
48 .  dir - directory name
49 
50    Options Database Keys:
51 +    -shared_tmp  - indicates the directory is shared among the MPI ranks
52 .    -not_shared_tmp - indicates the directory is not shared among the MPI ranks
53 -    -tmp tmpdir - name of the directory you wish to use as /tmp
54 
55    Environmental Variables:
56 +     `PETSC_SHARED_TMP` - indicates the directory is shared among the MPI ranks
57 .     `PETSC_NOT_SHARED_TMP` - indicates the directory is not shared among the MPI ranks
58 -     `PETSC_TMP` - name of the directory you wish to use as /tmp
59 
60    Level: developer
61 
62 .seealso: `PetscSharedTmp()`, `PetscSharedWorkingDirectory()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
63 @*/
64 PetscErrorCode PetscGetTmp(MPI_Comm comm, char dir[], size_t len) {
65   PetscBool flg;
66 
67   PetscFunctionBegin;
68   PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", dir, len, &flg));
69   if (!flg) PetscCall(PetscStrncpy(dir, "/tmp", len));
70   PetscFunctionReturn(0);
71 }
72 
73 /*@C
74    PetscSharedTmp - Determines if all processors in a communicator share a
75          /tmp or have different ones.
76 
77    Collective
78 
79    Input Parameter:
80 .  comm - MPI_Communicator that may share /tmp
81 
82    Output Parameter:
83 .  shared - `PETSC_TRUE` or `PETSC_FALSE`
84 
85    Options Database Keys:
86 +    -shared_tmp  - indicates the directory is shared among the MPI ranks
87 .    -not_shared_tmp - indicates the directory is not shared among the MPI ranks
88 -    -tmp tmpdir - name of the directory you wish to use as /tmp
89 
90    Environmental Variables:
91 +     `PETSC_SHARED_TMP`  - indicates the directory is shared among the MPI ranks
92 .     `PETSC_NOT_SHARED_TMP` - indicates the directory is not shared among the MPI ranks
93 -     `PETSC_TMP` - name of the directory you wish to use as /tmp
94 
95    Level: developer
96 
97    Notes:
98    Stores the status as a MPI attribute so it does not have
99     to be redetermined each time.
100 
101       Assumes that all processors in a communicator either
102        1) have a common /tmp or
103        2) each has a separate /tmp
104       eventually we can write a fancier one that determines which processors
105       share a common /tmp.
106 
107    This will be very slow on runs with a large number of processors since
108    it requires O(p*p) file opens.
109 
110    If the environmental variable PETSC_TMP is set it will use this directory
111   as the "/tmp" directory.
112 
113 .seealso: `PetscGetTmp()`, `PetscSharedWorkingDirectory()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
114 @*/
115 PetscErrorCode PetscSharedTmp(MPI_Comm comm, PetscBool *shared) {
116   PetscMPIInt        size, rank, *tagvalp, sum, cnt, i;
117   PetscBool          flg, iflg;
118   FILE              *fd;
119   static PetscMPIInt Petsc_Tmp_keyval = MPI_KEYVAL_INVALID;
120   int                err;
121 
122   PetscFunctionBegin;
123   PetscCallMPI(MPI_Comm_size(comm, &size));
124   if (size == 1) {
125     *shared = PETSC_TRUE;
126     PetscFunctionReturn(0);
127   }
128 
129   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_TMP", NULL, 0, &flg));
130   if (flg) {
131     *shared = PETSC_TRUE;
132     PetscFunctionReturn(0);
133   }
134 
135   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_TMP", NULL, 0, &flg));
136   if (flg) {
137     *shared = PETSC_FALSE;
138     PetscFunctionReturn(0);
139   }
140 
141   if (Petsc_Tmp_keyval == MPI_KEYVAL_INVALID) PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_Tmp_keyval, NULL));
142 
143   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Tmp_keyval, (void **)&tagvalp, (int *)&iflg));
144   if (!iflg) {
145     char filename[PETSC_MAX_PATH_LEN], tmpname[PETSC_MAX_PATH_LEN];
146 
147     /* This communicator does not yet have a shared tmp attribute */
148     PetscCall(PetscMalloc1(1, &tagvalp));
149     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Tmp_keyval, tagvalp));
150 
151     PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", tmpname, 238, &iflg));
152     if (!iflg) {
153       PetscCall(PetscStrcpy(filename, "/tmp"));
154     } else {
155       PetscCall(PetscStrcpy(filename, tmpname));
156     }
157 
158     PetscCall(PetscStrcat(filename, "/petsctestshared"));
159     PetscCallMPI(MPI_Comm_rank(comm, &rank));
160 
161     /* each processor creates a /tmp file and all the later ones check */
162     /* this makes sure no subset of processors is shared */
163     *shared = PETSC_FALSE;
164     for (i = 0; i < size - 1; i++) {
165       if (rank == i) {
166         fd = fopen(filename, "w");
167         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
168         err = fclose(fd);
169         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
170       }
171       PetscCallMPI(MPI_Barrier(comm));
172       if (rank >= i) {
173         fd = fopen(filename, "r");
174         if (fd) cnt = 1;
175         else cnt = 0;
176         if (fd) {
177           err = fclose(fd);
178           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
179         }
180       } else cnt = 0;
181 
182       PetscCall(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
183       if (rank == i) unlink(filename);
184 
185       if (sum == size) {
186         *shared = PETSC_TRUE;
187         break;
188       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share /tmp ");
189     }
190     *tagvalp = (int)*shared;
191     PetscCall(PetscInfo(NULL, "processors %s %s\n", (*shared) ? "share" : "do NOT share", (iflg ? tmpname : "/tmp")));
192   } else *shared = (PetscBool)*tagvalp;
193   PetscFunctionReturn(0);
194 }
195 
196 /*@C
197   PetscSharedWorkingDirectory - Determines if all processors in a communicator share a working directory or have different ones.
198 
199   Collective
200 
201   Input Parameter:
202 . comm - MPI_Communicator that may share working directory
203 
204   Output Parameter:
205 . shared - `PETSC_TRUE` or `PETSC_FALSE`
206 
207   Options Database Keys:
208 + -shared_working_directory - indicates the directory is shared among the MPI ranks
209 - -not_shared_working_directory - indicates the directory is shared among the MPI ranks
210 
211   Environmental Variables:
212 + `PETSC_SHARED_WORKING_DIRECTORY` - indicates the directory is shared among the MPI ranks
213 - `PETSC_NOT_SHARED_WORKING_DIRECTORY` - indicates the directory is shared among the MPI ranks
214 
215   Level: developer
216 
217   Notes:
218   Stores the status as a MPI attribute so it does not have to be redetermined each time.
219 
220   Assumes that all processors in a communicator either
221 $   1) have a common working directory or
222 $   2) each has a separate working directory
223   eventually we can write a fancier one that determines which processors share a common working directory.
224 
225   This will be very slow on runs with a large number of processors since it requires O(p*p) file opens.
226 
227 .seealso: `PetscGetTmp()`, `PetscSharedTmp()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
228 @*/
229 PetscErrorCode PetscSharedWorkingDirectory(MPI_Comm comm, PetscBool *shared) {
230   PetscMPIInt        size, rank, *tagvalp, sum, cnt, i;
231   PetscBool          flg, iflg;
232   FILE              *fd;
233   static PetscMPIInt Petsc_WD_keyval = MPI_KEYVAL_INVALID;
234   int                err;
235 
236   PetscFunctionBegin;
237   PetscCallMPI(MPI_Comm_size(comm, &size));
238   if (size == 1) {
239     *shared = PETSC_TRUE;
240     PetscFunctionReturn(0);
241   }
242 
243   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
244   if (flg) {
245     *shared = PETSC_TRUE;
246     PetscFunctionReturn(0);
247   }
248 
249   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
250   if (flg) {
251     *shared = PETSC_FALSE;
252     PetscFunctionReturn(0);
253   }
254 
255   if (Petsc_WD_keyval == MPI_KEYVAL_INVALID) PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_WD_keyval, NULL));
256 
257   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_WD_keyval, (void **)&tagvalp, (int *)&iflg));
258   if (!iflg) {
259     char filename[PETSC_MAX_PATH_LEN];
260 
261     /* This communicator does not yet have a shared  attribute */
262     PetscCall(PetscMalloc1(1, &tagvalp));
263     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_WD_keyval, tagvalp));
264 
265     PetscCall(PetscGetWorkingDirectory(filename, 240));
266     PetscCall(PetscStrcat(filename, "/petsctestshared"));
267     PetscCallMPI(MPI_Comm_rank(comm, &rank));
268 
269     /* each processor creates a  file and all the later ones check */
270     /* this makes sure no subset of processors is shared */
271     *shared = PETSC_FALSE;
272     for (i = 0; i < size - 1; i++) {
273       if (rank == i) {
274         fd = fopen(filename, "w");
275         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
276         err = fclose(fd);
277         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
278       }
279       PetscCallMPI(MPI_Barrier(comm));
280       if (rank >= i) {
281         fd = fopen(filename, "r");
282         if (fd) cnt = 1;
283         else cnt = 0;
284         if (fd) {
285           err = fclose(fd);
286           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
287         }
288       } else cnt = 0;
289 
290       PetscCall(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
291       if (rank == i) unlink(filename);
292 
293       if (sum == size) {
294         *shared = PETSC_TRUE;
295         break;
296       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share working directory");
297     }
298     *tagvalp = (int)*shared;
299   } else *shared = (PetscBool)*tagvalp;
300   PetscCall(PetscInfo(NULL, "processors %s working directory\n", (*shared) ? "shared" : "do NOT share"));
301   PetscFunctionReturn(0);
302 }
303 
304 /*@C
305     PetscFileRetrieve - Obtains a file from a URL or compressed
306         and copies into local disk space as uncompressed.
307 
308     Collective
309 
310     Input Parameters:
311 +   comm     - processors accessing the file
312 .   url      - name of file, including entire URL (with or without .gz)
313 -   llen     - length of localname
314 
315     Output Parameters:
316 +   localname - name of local copy of file - valid on only process zero
317 -   found - if found or retrieved the file - valid on all processes
318 
319     Note:
320     if the file already exists local this function just returns without downloading it.
321 
322     Level: intermediate
323 @*/
324 PetscErrorCode PetscFileRetrieve(MPI_Comm comm, const char url[], char localname[], size_t llen, PetscBool *found) {
325   char        buffer[PETSC_MAX_PATH_LEN], *par, *tlocalname, name[PETSC_MAX_PATH_LEN];
326   FILE       *fp;
327   PetscMPIInt rank;
328   size_t      len = 0;
329   PetscBool   flg1, flg2, flg3, flg4, download, compressed = PETSC_FALSE;
330 
331   PetscFunctionBegin;
332   PetscCallMPI(MPI_Comm_rank(comm, &rank));
333   if (rank == 0) {
334     *found = PETSC_FALSE;
335 
336     PetscCall(PetscStrstr(url, ".gz", &par));
337     if (par) {
338       PetscCall(PetscStrlen(par, &len));
339       if (len == 3) compressed = PETSC_TRUE;
340     }
341 
342     PetscCall(PetscStrncmp(url, "ftp://", 6, &flg1));
343     PetscCall(PetscStrncmp(url, "http://", 7, &flg2));
344     PetscCall(PetscStrncmp(url, "file://", 7, &flg3));
345     PetscCall(PetscStrncmp(url, "https://", 8, &flg4));
346     download = (PetscBool)(flg1 || flg2 || flg3 || flg4);
347 
348     if (!download && !compressed) {
349       PetscCall(PetscStrncpy(localname, url, llen));
350       PetscCall(PetscTestFile(url, 'r', found));
351       if (*found) {
352         PetscCall(PetscInfo(NULL, "Found file %s\n", url));
353       } else {
354         PetscCall(PetscInfo(NULL, "Did not find file %s\n", url));
355       }
356       goto done;
357     }
358 
359     /* look for uncompressed file in requested directory */
360     if (compressed) {
361       PetscCall(PetscStrncpy(localname, url, llen));
362       PetscCall(PetscStrstr(localname, ".gz", &par));
363       *par = 0; /* remove .gz extension */
364       PetscCall(PetscTestFile(localname, 'r', found));
365       if (*found) goto done;
366     }
367 
368     /* look for file in current directory */
369     PetscCall(PetscStrrchr(url, '/', &tlocalname));
370     PetscCall(PetscStrncpy(localname, tlocalname, llen));
371     if (compressed) {
372       PetscCall(PetscStrstr(localname, ".gz", &par));
373       *par = 0; /* remove .gz extension */
374     }
375     PetscCall(PetscTestFile(localname, 'r', found));
376     if (*found) goto done;
377 
378     if (download) {
379       /* local file is not already here so use curl to get it */
380       PetscCall(PetscStrncpy(localname, tlocalname, llen));
381       PetscCall(PetscStrcpy(buffer, "curl --fail --silent --show-error "));
382       PetscCall(PetscStrcat(buffer, url));
383       PetscCall(PetscStrcat(buffer, " > "));
384       PetscCall(PetscStrcat(buffer, localname));
385 #if defined(PETSC_HAVE_POPEN)
386       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
387       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
388 #else
389       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
390 #endif
391       PetscCall(PetscTestFile(localname, 'r', found));
392       if (*found) {
393         FILE *fd;
394         char  buf[1024], *str, *substring;
395 
396         /* check if the file didn't exist so it downloaded an HTML message instead */
397         fd = fopen(localname, "r");
398         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscTestFile() indicates %s exists but fopen() cannot open it", localname);
399         str = fgets(buf, sizeof(buf) - 1, fd);
400         while (str) {
401           PetscCall(PetscStrstr(buf, "<!DOCTYPE html>", &substring));
402           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
403           PetscCall(PetscStrstr(buf, "Not Found", &substring));
404           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
405           str = fgets(buf, sizeof(buf) - 1, fd);
406         }
407         fclose(fd);
408       }
409     } else if (compressed) {
410       PetscCall(PetscTestFile(url, 'r', found));
411       if (!*found) goto done;
412       PetscCall(PetscStrncpy(localname, url, llen));
413     }
414     if (compressed) {
415       PetscCall(PetscStrrchr(localname, '/', &tlocalname));
416       PetscCall(PetscStrncpy(name, tlocalname, PETSC_MAX_PATH_LEN));
417       PetscCall(PetscStrstr(name, ".gz", &par));
418       *par = 0; /* remove .gz extension */
419       /* uncompress file */
420       PetscCall(PetscStrcpy(buffer, "gzip -c -d "));
421       PetscCall(PetscStrcat(buffer, localname));
422       PetscCall(PetscStrcat(buffer, " > "));
423       PetscCall(PetscStrcat(buffer, name));
424 #if defined(PETSC_HAVE_POPEN)
425       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
426       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
427 #else
428       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
429 #endif
430       PetscCall(PetscStrncpy(localname, name, llen));
431       PetscCall(PetscTestFile(localname, 'r', found));
432     }
433   }
434 done:
435   PetscCallMPI(MPI_Bcast(found, 1, MPIU_BOOL, 0, comm));
436   PetscCallMPI(MPI_Bcast(localname, llen, MPI_CHAR, 0, comm));
437   PetscFunctionReturn(0);
438 }
439