NCBI C++ ToolKit
ncbi_ftp_connector.h
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef CONNECT___NCBI_FTP_CONNECTOR__H
2 #define CONNECT___NCBI_FTP_CONNECTOR__H
3 
4 /* $Id: ncbi_ftp_connector.h 94307 2021-07-19 17:25:18Z lavr $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Anton Lavrentiev
30  *
31  * File Description:
32  * FTP CONNECTOR implements FTP client side API
33  *
34  * See <connect/ncbi_connector.h> for the detailed specification of
35  * the connector's methods and structures.
36  *
37  */
38 
39 #include <connect/ncbi_connutil.h>
40 
41 
42 /** @addtogroup Connectors
43  *
44  * @{
45  */
46 
47 
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51 
52 
53 enum EFTP_Flag {
55  fFTP_LogData = 0x2,
57  fFTP_UseFeatures = 0x4, /* parse FEAT to check for available FTP feats*/
58  fFTP_NotifySize = 0x8, /* use C/B to communicate file size to user */
59  fFTP_UsePassive = 0x10, /* use only passive mode for data connection */
60  fFTP_UseActive = 0x20, /* use only active mode for data connection */
61  fFTP_UseTypeL8 = 0x40, /* use "TYPE L8" instead of "TYPE I" for data */
62  fFTP_UncleanIAC = 0x80, /* do not escape IAC(\'377') in pathnames */
63  fFTP_IgnorePath = 0x100, /* do not auto-chdir(net_info->path) at login */
64  fFTP_UncorkUpload = 0x200, /* do not use TCP_CORK for uploads (poor perf)*/
65  fFTP_NoSizeChecks = 0x400, /* do not check sizes of data transfers */
66  fFTP_NoExtensions = 0x800, /* do not use EPSV/EPRT protocol extensions */
67  fFTP_DelayRestart = 0x1000,/* delay RESTart until an actual xfer command */
68  fFTP_UseProxy = 0x2000 /* use proxy settings to establish connections*/
69 };
70 typedef unsigned int TFTP_Flags; /* bitwise OR of EFTP_Flag */
71 
72 
73 /*
74  * I M P L E M E N T A T I O N D E T A I L S
75  *
76  * FTP protocol implies the use of both a control connection (to pass commands
77  * and responses) and a data connection (to pass file contents), so is a 4-way
78  * communication scheme.
79  *
80  * CONN API supports only a two way scheme, which is why for FTP a special
81  * discipline is required to communicate with the server.
82  *
83  * User code interacts with FTP server by means of writing commands (most of
84  * which are direct FTP commands, see below), and either reading responses or
85  * file contents. There is a special stop command to clear up any pending
86  * command/action in progress.
87  *
88  * Table below describes each command and what the user code is expected to do.
89  * Note that commands unsupported by the server are automatically unsupported
90  * by the connector.
91  *
92  * Upon open, the FTP server gets connected to with the specified username and
93  * password (user accounts [ACCT] are not supported), and the data transfer is
94  * set to be STREAM/FILE/BINARY(I or L8) -- these transfer parameters may not
95  * be changed.
96  *
97  * Any disruption in control connection with the server renders FTP connection
98  * bad and unusable. There is no automatic recovery (other than a full restart
99  * of the connection) provided by the implementation.
100  *
101  * USER COMMAND(write) ACTION(server) OUTPUT(text to read on success)
102  *
103  * REN f1 f2 Rename file f1 to f2 250
104  * CWD<SP>d Change directory to d 250
105  * PWD Get current directory Name of current directory
106  * MKD<SP>d Create directory d Name of created directory
107  * RMD<SP>d Delete directory d 250
108  * CDUP Go one dir level up 200
109  * SYST Get system info Single-line system info
110  * STAT[<SP>f] Get status[of file f] Server response as received
111  * SIZE<SP>f Get size of file f Size of the file (numeric str)
112  * MDTM<SP>f Get time of file f File time (UTC secs since epoch)
113  * DELE<SP>f Delete file f 250
114  * REST<SP>offset Offset the transfer 350
115  * LIST[<SP>d] List curdir[or dir d] Full directory listing
116  * NLST[<SP>d] Short list as in LIST Short dirlist (filenames only)
117  * RETR<SP>f Retrieve file f File contents
118  * MLSD[<SP>d] List curdir[or dir d] Mach-readable directory listing
119  * MLST[<SP>p] Facts of curdir[or p] Mach-readable path facts
120  * FEAT FEAT command FEAT list as returned by server
121  * OPTS<SP>opts OPTS command OPTS response as received
122  * NOOP NOOP command (*) <EOF>
123  * STOR<SP>f Store file f on server
124  * APPE<SP>f Append/create file f
125  * <empty> See NOOP
126  *
127  * All commands above must be terminated with LF('\n') to be executed
128  * (otherwise, successive writes are causing the command to continue to
129  * accumulate in the internal command buffer). Only one command can be
130  * executed at a time (i.g. writing something like "CDUP\nSYST..." in a single
131  * write is illegal). Note that the codes are text strings each consisting of
132  * 3 chars (not binary integers!) -- the "values" are chosen to be equivalent
133  * to FTP response codes that the FTP servers are expected to generate upon
134  * successful completion of the corresponding commands (per RFC959), but may
135  * not necessarily be the actual codes as received from the server (connector
136  * is somewhat flexible with accepting various codes noted in several different
137  * implementation of FTP servers). Just "\n" on its own encodes an empty
138  * string command, and is treated just the same as "NOOP\n".
139  *
140  * <SP> denotes exactly one space character, a blank means any number of space
141  * or tab characters. Single filenames(f), directories(d), and paths(p) span
142  * up to the end of the command ('\n'), and do not require any quoting for
143  * special characters. Exception is the REN command, which takes two names,
144  * f1 and f2, each being either a single token (no leading '"' and embedded
145  * spaces / tabs), or quoted FTP-style (enclosed in double quotes, with any
146  * embedded double quote character doubled, e.g. """a""b" encodes the file
147  * name "a"b). Note that the filename a"b (no leading quote) does not require
148  * any additional quoting (but it still may be used as "a""b").
149  *
150  * Some commands (e.g. NLST, MLSD, etc) allow an optional argument, which can
151  * be either present or omitted (the optional part is shown in the square
152  * brakets, which are not the elements of those commands). The UTC seconds can
153  * have a fraction portion preceded by a decimal point.
154  *
155  * Current implementation forbids file names to contain '\0', '\r', or '\n'
156  * (even though FTP takes special precautions how to deal with such names).
157  *
158  * (*) Note that any subsequent command aborts any unfinished command (if any
159  * in progress), so even "NOOP" executed in that context can cause the server
160  * to terminate the current transfer (see below).
161  *
162  * Normally, FTP connection operates in READ mode: commands are written and
163  * responses are read. In this mode the connection consumes any command, but
164  * those invalid, unrecognized, or rejected by the server will cause
165  * CONN_Status(eIO_Write) to return non-eIO_Success. Note that since normally
166  * CONN_Write() returns eIO_Success when at least one byte of data has been
167  * consumed, its return code is basically useless to distinguish the command
168  * completion status. Instead of terminating commands with '\n', CONN_Flush()
169  * can be used, and its return code will be the true status of how the command
170  * was done. Alternatively, CONN_Wait(eIO_Read) can cause a similar effect,
171  * and finally, a read from FTP connection that operates in READ mode causes a
172  * pending command to be executed (even if the connection was created untied,
173  * the additional flushing is done internally).
174  *
175  * When a RETR/LIST/NLST/MLSD command gets executed, all subsequent reads from
176  * the connection will retrieve the contents of the file or directory (until
177  * eIO_Closed). If the connection returns eIO_Closed right away, it means that
178  * either the file/directory does not exist, or RETR was attempted on a
179  * directory, or finally, the requested file/directory is empty. The first two
180  * cases would cause CONN_Status(eIO_Write) to return a code different from
181  * eIO_Success; and eIO_Success would only result in the case of an empty
182  * source.
183  *
184  * File size will be checked by the connector to see whether the download (or
185  * upload, see below) was complete (sometimes, the information returned from
186  * the server does not allow to perform this check). Any mismatch will result
187  * in an error different from eIO_Closed. (For buggy / noisy FTP servers, the
188  * size checks can be suppressed via the connector flags.)
189  *
190  * During file download, any command (legitimate or not) written to the
191  * connection and triggered for execution will abort the data transfer (results
192  * in a warning logged, yet the connection must still be manually drained until
193  * eIO_Closed), but if an output is expected from such a command, it cannot be
194  * distinguished from the remnants of the file data -- so such a method is not
195  * very robust.
196  *
197  * There is a special empty command ("\n") that can be written to abort the
198  * transfer: it gets converted to "NOOP" internally, produces no output (just
199  * inserts eIO_Closed in data), and for it is to be a legitimate command, it
200  * usually results in eIO_Success when inquired for write status (the result
201  * may be different on a rare occasion if the server has chosen to drop the
202  * control connection, for example). Still, to be usable again the connection
203  * must be drained out until eIO_Closed is received by reading.
204  *
205  * Note that for commands, which return text codes, it is allowed not to read
206  * the codes out, but rely solely on CONN_Status() responses. Any pending
207  * (unread) result of the previous command gets discarded when a new command
208  * gets executed (i.e. command accumulation in the internal buffer does not
209  * cause the pending result to be discarded; it is the connection flushing, as
210  * with '\n', CONN_Flush(), etc that does so). (Same happens with results of
211  * the commands returning non-code information, but reading it out is supposed
212  * to be the very purpose of issuing of such commands, and hence, is not
213  * mentioned above.)
214  *
215  * Connection is switched to SEND mode upon either APPE or STOR gets executed.
216  * If that is successful (CONN_Status(eIO_Write) reports eIO_Success), then
217  * any following writes will send the data to the file being uploaded (while
218  * the file is being uploaded, CONN_Status(eIO_Write) will report the status of
219  * the last write operation to the file). Should an error occur, eIO_Closed
220  * would result, and the connection would not accept any more writes until it
221  * is read. Similarly, when an upload is about to finish, the connection must
222  * be read to finalize the transfer. The result of the read will be a string
223  * representing the size of the uploaded file data as a sequence of decimal
224  * digits (or an empty read in case of an upload error). Once all digits are
225  * consumed (eIO_Closed seen) the connection returns to READ mode.
226  * CONN_Wait(eIO_Read) will also cause the upload to finalize -- still the data
227  * size is expected to be extracted (or discarded if another command follows).
228  *
229  * Unfinalized uploads (such as when connection gets closed before the final
230  * read) get reported to the log, and also make CONN_Close() to return an
231  * error. Note that unlike file download (which occurs in READ mode), it is
232  * impossible to abort an upload by writing any FTP commands (since writing in
233  * SEND mode goes to file), but it is reading that will cause the cancellation.
234  * So if a connection is in undetermined state, the recovery would be to do a
235  * small quick read (e.g. for just 1 byte with a small timeout), then write the
236  * NOOP command and cause an execution (e.g. writing just "\n" does that), then
237  * drain the connection by reading again until eIO_Closed.
238  *
239  * Both downloads and uploads (but not file lists!) support restart mode (if
240  * the server permits so). The standard guarantees that the REST command
241  * remains in effect only until any subsequent command (which is supposed to be
242  * either RETR or STOR), and that servers might lose the restart position,
243  * otherwise. However, many implementations allow to open a data connection in
244  * the interim. Since the FTP connector opens data connection only upon
245  * receiving a data transfer command from the user, it thus can clobber the
246  * preceding REST for the servers that do not allow the extra activity. For
247  * those, the REST command can be delayed for issuance until right before the
248  * data transfer is about to begin (see flags). In this case, a write of such
249  * command does not result in the "350" response on read (still,
250  * CONN_Write()/CONN_Flush()/CONN_Status() will all be reported as successful
251  * if the command was properly understood by the connector).
252  *
253  * The connector drops any restart position, which remains for longer than
254  * the next user command (so the restart position will not be accidentally
255  * taken into account for any further transfer size verifications). Note
256  * that only successful transfers are said to reset the restart position back
257  * to 0 at the server end (failed ones might not do so), which is why it is a
258  * sole responsibility of the user code to maintain/drop the restart position
259  * on the server by issuing the REST commands explicitly, as appropriate.
260  * Note that "REST 0" issued by the user code never gets delayed, and relayed
261  * immediately to the server (with the result code "350" available for read
262  * if succeeded on the server end).
263  *
264  * The supplement mode of CONN API can make use of FTP connection much easier:
265  * instead of checking for CONN_Status(), direct return codes of read/write
266  * operations can be used. Care must be taken to interpret eIO_Closed that may
267  * result from read operations (such as when extracting a numeric string of
268  * command completion that is immediately followed by the response boundary
269  * denoted as eIO_Closed).
270  *
271  * To make the code robust, it is always advised to process the tranfser byte
272  * count first, and then to proceed with the return status analysis.
273  */
274 
275 /* Even though many FTP server implementations provide SIZE command these days,
276  * some FTPDs still lack this feature and can post the file size only when the
277  * actual download starts. For them, and for connections that do not want to
278  * get the size inserted into the data stream (which is the default behavior
279  * upon a successful SIZE command), the following callback is provided as an
280  * alternative solution.
281  * The callback gets activated when downloads start, and also upon successful
282  * SIZE commands (without causing the file size to appear in the connection
283  * data as it usually would otherwise) but the latter is only if
284  * fFTP_NotifySize has been set in the "flag" parameter of FTP connector
285  * constructors (below).
286  * Each time the size gets passed to the callback as a '\0'-terminated
287  * character string.
288  * The callback remains effective for the entire lifetime of the connector.
289  * As the first argument, the callback also gets a copy of the FTP command
290  * that triggered it, and for compatibility with future extensions, the user
291  * code is expected to check, which command it is processing, before proceeding
292  * with the "arg" parameter (thus skipping unexpected commands, and returning
293  * eIO_Success). Return code non-eIO_Success causes the command terminate
294  * with an error, with the code returned "as-is" from a CONN call.
295  *
296  * NOTE: With restarted data retrievals (REST) the size reported by the server
297  * in response to transfer initiation can be either the true size of the data
298  * to be received or the entire size of the original file (without the restart
299  * offset taken into account), and the latter should be considered as a bug.
300  */
301 typedef EIO_Status (*FFTP_Callback)(void* data,
302  const char* cmd, const char* arg);
303 typedef struct {
304  FFTP_Callback func; /* to call upon certain FTP commands */
305  void* data; /* to supply as a first callback parameter */
306 } SFTP_Callback;
307 
308 
309 /* Create new CONNECTOR structure to handle FTP transfers,
310  * both download and upload. Return NULL on error.
311  */
313 (const char* host, /* hostname, required */
314  unsigned short port, /* port #, 21 [standard] if 0 passed here */
315  const char* user, /* username, "ftp" [==anonymous] by default */
316  const char* pass, /* password, "none" by default */
317  const char* path, /* initial directory to "chdir" to on server */
318  TFTP_Flags flag, /* mostly for logging socket data [optional] */
319  const SFTP_Callback* cmcb /* command callback [optional] */
320 );
321 
322 
323 /* Same as above but use fields provided by the connection structure.
324  * Note: info->timeout is only used for tunneling, not for FTP xfers */
326 (const SConnNetInfo* info, /* all connection params including HTTP proxy */
327  TFTP_Flags flag, /* mostly for logging socket data [optional] */
328  const SFTP_Callback* cmcb /* command callback [optional] */
329 );
330 
331 
332 #ifdef __cplusplus
333 } /* extern "C" */
334 #endif
335 
336 
337 /* @} */
338 
339 #endif /* CONNECT___NCBI_FTP_CONNECTOR__H */
static CS_COMMAND * cmd
Definition: ct_dynamic.c:26
char data[12]
Definition: iconv.c:80
CONNECTOR FTP_CreateConnectorSimple(const char *host, unsigned short port, const char *user, const char *pass, const char *path, TFTP_Flags flag, const SFTP_Callback *cmcb)
EIO_Status(* FFTP_Callback)(void *data, const char *cmd, const char *arg)
CONNECTOR FTP_CreateConnector(const SConnNetInfo *info, TFTP_Flags flag, const SFTP_Callback *cmcb)
FFTP_Callback func
unsigned int TFTP_Flags
@ fFTP_UsePassive
@ fFTP_UseProxy
@ fFTP_UseTypeL8
@ fFTP_UncorkUpload
@ fFTP_UseActive
@ fFTP_UncleanIAC
@ fFTP_LogAll
@ fFTP_UseFeatures
@ fFTP_NoExtensions
@ fFTP_NotifySize
@ fFTP_IgnorePath
@ fFTP_LogData
@ fFTP_DelayRestart
@ fFTP_LogControl
@ fFTP_NoSizeChecks
EIO_Status
I/O status.
Definition: ncbi_core.h:132
#define NCBI_XCONNECT_EXPORT
static MDB_envinfo info
Definition: mdb_load.c:37
Connector specification.
Modified on Fri Sep 20 14:57:50 2024 by modify_doxy.py rev. 669887