import libfetch-2.38 from NetBSD

ftp://ftp.fu-berlin.de/unix/NetBSD/packages/current-src/pkgsrc/net/libfetch/files

libfetch comes (at least) in netbsd and freebsd flavors which
differing functionality. Alpine and Arch package netbsd one,
but it's not widely packaged across other distributions.

We need NetBSD version as it does not use funopen(3) which is not
supported in musl, and supports connection pooling.

FreeBSD seems to be the orignal and better maintained version
with support for SSL CAs, client certificate authentication,
proxy authentication, and improved http redirect handling.

So this imports NetBSD version, and future commits will pick up
the needed improvements from FreeBSD tree.

Incidentally, this also fixes #7857 and likes for good.
cute-signatures
Timo Teräs 2017-10-05 08:57:18 +03:00
parent 8f0938a056
commit 9dc6278c7b
13 changed files with 6630 additions and 0 deletions

45
libfetch/Makefile Normal file
View File

@ -0,0 +1,45 @@
# $NetBSD: Makefile,v 1.8 2016/10/27 10:05:38 joerg Exp $
LIB= fetch
SRCS= fetch.c common.c ftp.c http.c file.c
DPSRCS= ftperr.h httperr.h
INCS= fetch.h
MAN= fetch.3
CLEANFILES= ftperr.h httperr.h
MKLINT= no
MKPIC= no
MKPROFILE= no
.include <bsd.own.mk>
CPPFLAGS+= -I.
CPPFLAGS+= -D_LARGEFILE_SOURCE -D_LARGE_FILES -D_FILE_OFFSET_BITS=64
FETCH_WITH_INET6?= no
FETCH_WITH_OPENSSL?= no
.if !empty(FETCH_WITH_INET6:M[yY][eE][sS])
CPPFLAGS+= -DINET6
.endif
.if !empty(FETCH_WITH_OPENSSL:M[yY][eE][sS])
CPPFLAGS+= -DWITH_SSL
LDADD= -lssl -lcrypto
.endif
CPPFLAGS+= -DFTP_COMBINE_CWDS
WARNS?= 4
ftp.o: ftperr.h
http.o: httperr.h
ftperr.h: ${.CURDIR}/ftp.errors ${.CURDIR}/Makefile ${.CURDIR}/errlist.sh
sh ${.CURDIR}/errlist.sh ftp_errlist FTP \
${.CURDIR}/ftp.errors > ${.TARGET}
httperr.h: ${.CURDIR}/http.errors ${.CURDIR}/Makefile ${.CURDIR}/errlist.sh
sh ${.CURDIR}/errlist.sh http_errlist HTTP \
${.CURDIR}/http.errors > ${.TARGET}
.include <bsd.lib.mk>

1085
libfetch/common.c Normal file

File diff suppressed because it is too large Load Diff

147
libfetch/common.h Normal file
View File

@ -0,0 +1,147 @@
/* $NetBSD: common.h,v 1.24 2016/10/20 21:25:57 joerg Exp $ */
/*-
* Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD: common.h,v 1.30 2007/12/18 11:03:07 des Exp $
*/
#ifndef _COMMON_H_INCLUDED
#define _COMMON_H_INCLUDED
#define FTP_DEFAULT_PORT 21
#define HTTP_DEFAULT_PORT 80
#define FTP_DEFAULT_PROXY_PORT 21
#define HTTP_DEFAULT_PROXY_PORT 3128
#ifdef WITH_SSL
#include <openssl/crypto.h>
#include <openssl/x509.h>
#include <openssl/pem.h>
#include <openssl/ssl.h>
#include <openssl/err.h>
#endif
#if defined(__GNUC__) && __GNUC__ >= 3
#define LIBFETCH_PRINTFLIKE(fmtarg, firstvararg) \
__attribute__((__format__ (__printf__, fmtarg, firstvararg)))
#else
#define LIBFETCH_PRINTFLIKE(fmtarg, firstvararg)
#endif
#if !defined(__sun) && !defined(__hpux) && !defined(__INTERIX) && \
!defined(__digital__) && !defined(__linux) && !defined(__MINT__) && \
!defined(__sgi) && !defined(__minix) && !defined(__CYGWIN__)
#define HAVE_SA_LEN
#endif
/* Connection */
typedef struct fetchconn conn_t;
struct fetchconn {
int sd; /* socket descriptor */
char *buf; /* buffer */
size_t bufsize; /* buffer size */
size_t buflen; /* length of buffer contents */
int buf_events; /* poll flags for the next cycle */
char *next_buf; /* pending buffer, e.g. after getln */
size_t next_len; /* size of pending buffer */
int err; /* last protocol reply code */
#ifdef WITH_SSL
SSL *ssl; /* SSL handle */
SSL_CTX *ssl_ctx; /* SSL context */
X509 *ssl_cert; /* server certificate */
# if OPENSSL_VERSION_NUMBER < 0x00909000L
SSL_METHOD *ssl_meth; /* SSL method */
# else
const SSL_METHOD *ssl_meth; /* SSL method */
# endif
#endif
char *ftp_home;
struct url *cache_url;
int cache_af;
int (*cache_close)(conn_t *);
conn_t *next_cached;
};
/* Structure used for error message lists */
struct fetcherr {
const int num;
const int cat;
const char *string;
};
void fetch_seterr(struct fetcherr *, int);
void fetch_syserr(void);
void fetch_info(const char *, ...) LIBFETCH_PRINTFLIKE(1, 2);
int fetch_default_port(const char *);
int fetch_default_proxy_port(const char *);
int fetch_bind(int, int, const char *);
conn_t *fetch_cache_get(const struct url *, int);
void fetch_cache_put(conn_t *, int (*)(conn_t *));
conn_t *fetch_connect(struct url *, int, int);
conn_t *fetch_reopen(int);
int fetch_ssl(conn_t *, const struct url *, int);
ssize_t fetch_read(conn_t *, char *, size_t);
int fetch_getln(conn_t *);
ssize_t fetch_write(conn_t *, const void *, size_t);
int fetch_close(conn_t *);
int fetch_add_entry(struct url_list *, struct url *, const char *, int);
int fetch_netrc_auth(struct url *url);
int fetch_no_proxy_match(const char *);
int fetch_urlpath_safe(char);
#define ftp_seterr(n) fetch_seterr(ftp_errlist, n)
#define http_seterr(n) fetch_seterr(http_errlist, n)
#define netdb_seterr(n) fetch_seterr(netdb_errlist, n)
#define url_seterr(n) fetch_seterr(url_errlist, n)
fetchIO *fetchIO_unopen(void *, ssize_t (*)(void *, void *, size_t),
ssize_t (*)(void *, const void *, size_t), void (*)(void *));
/*
* I don't really like exporting http_request() and ftp_request(),
* but the HTTP and FTP code occasionally needs to cross-call
* eachother, and this saves me from adding a lot of special-case code
* to handle those cases.
*
* Note that _*_request() free purl, which is way ugly but saves us a
* whole lot of trouble.
*/
fetchIO *http_request(struct url *, const char *,
struct url_stat *, struct url *, const char *);
fetchIO *ftp_request(struct url *, const char *, const char *,
struct url_stat *, struct url *, const char *);
/*
* Check whether a particular flag is set
*/
#define CHECK_FLAG(x) (flags && strchr(flags, (x)))
#endif

11
libfetch/errlist.sh Executable file
View File

@ -0,0 +1,11 @@
#!/bin/sh
# $NetBSD: errlist.sh,v 1.2 2008/10/06 12:58:29 joerg Exp $
printf "static struct fetcherr $1[] = {\n"
while read code type msg; do
[ "${code}" = "#" ] && continue
printf "\t{ ${code}, FETCH_${type}, \"${msg}\" },\n"
done < $3
printf "\t{ -1, FETCH_UNKNOWN, \"Unknown $2 error\" }\n"
printf "};\n"

781
libfetch/fetch.3 Normal file
View File

@ -0,0 +1,781 @@
.\"-
.\" Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav
.\" Copyright (c) 2010 Joerg Sonnenberger <joerg@NetBSD.org>
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD: fetch.3,v 1.64 2007/12/18 11:03:26 des Exp $
.\" $NetBSD: fetch.3,v 1.17 2016/05/31 18:02:36 abhinav Exp $
.\"
.Dd January 22, 2010
.Dt FETCH 3
.Os
.Sh NAME
.Nm fetchMakeURL ,
.Nm fetchParseURL ,
.Nm fetchCopyURL ,
.Nm fetchFreeURL ,
.Nm fetchXGetURL ,
.Nm fetchGetURL ,
.Nm fetchPutURL ,
.Nm fetchStatURL ,
.Nm fetchListURL ,
.Nm fetchXGet ,
.Nm fetchGet ,
.Nm fetchPut ,
.Nm fetchStat ,
.Nm fetchList ,
.Nm fetchXGetFile ,
.Nm fetchGetFile ,
.Nm fetchPutFile ,
.Nm fetchStatFile ,
.Nm fetchListFile ,
.Nm fetchXGetHTTP ,
.Nm fetchGetHTTP ,
.Nm fetchPutHTTP ,
.Nm fetchStatHTTP ,
.Nm fetchListHTTP ,
.Nm fetchXGetFTP ,
.Nm fetchGetFTP ,
.Nm fetchPutFTP ,
.Nm fetchStatFTP ,
.Nm fetchListFTP
.Nm fetchInitURLList ,
.Nm fetchFreeURLList ,
.Nm fetchUnquotePath ,
.Nm fetchUnquoteFilename ,
.Nm fetchStringifyURL ,
.Nm fetchConnectionCacheInit ,
.Nm fetchConnectionCacheClose ,
.Nm fetch
.Nd file transfer functions
.Sh LIBRARY
.Lb libfetch
.Sh SYNOPSIS
.In stdio.h
.In fetch.h
.Ft struct url *
.Fn fetchMakeURL "const char *scheme" "const char *host" "int port" "const char *doc" "const char *user" "const char *pwd"
.Ft struct url *
.Fn fetchParseURL "const char *URL"
.Ft struct url *
.Fn fetchCopyURL "const struct url *u"
.Ft void
.Fn fetchFreeURL "struct url *u"
.Ft fetchIO *
.Fn fetchXGetURL "const char *URL" "struct url_stat *us" "const char *flags"
.Ft fetchIO *
.Fn fetchGetURL "const char *URL" "const char *flags"
.Ft fetchIO *
.Fn fetchPutURL "const char *URL" "const char *flags"
.Ft int
.Fn fetchStatURL "const char *URL" "struct url_stat *us" "const char *flags"
.Ft int
.Fn fetchListURL "struct url_list *list" "const char *URL" "const char *pattern" "const char *flags"
.Ft fetchIO *
.Fn fetchXGet "struct url *u" "struct url_stat *us" "const char *flags"
.Ft fetchIO *
.Fn fetchGet "struct url *u" "const char *flags"
.Ft fetchIO *
.Fn fetchPut "struct url *u" "const char *flags"
.Ft int
.Fn fetchStat "struct url *u" "struct url_stat *us" "const char *flags"
.Ft int
.Fn fetchList "struct url_list *list" "struct url *u" "const char *pattern" "const char *flags"
.Ft fetchIO *
.Fn fetchXGetFile "struct url *u" "struct url_stat *us" "const char *flags"
.Ft fetchIO *
.Fn fetchGetFile "struct url *u" "const char *flags"
.Ft fetchIO *
.Fn fetchPutFile "struct url *u" "const char *flags"
.Ft int
.Fn fetchStatFile "struct url *u" "struct url_stat *us" "const char *flags"
.Ft int
.Fn fetchListFile "struct url_list *list" "struct url *u" "const char *pattern" "const char *flags"
.Ft fetchIO *
.Fn fetchXGetHTTP "struct url *u" "struct url_stat *us" "const char *flags"
.Ft fetchIO *
.Fn fetchGetHTTP "struct url *u" "const char *flags"
.Ft fetchIO *
.Fn fetchPutHTTP "struct url *u" "const char *flags"
.Ft int
.Fn fetchStatHTTP "struct url *u" "struct url_stat *us" "const char *flags"
.Ft int
.Fn fetchListHTTP "struct url_list *list" "struct url *u" "const char *pattern" "const char *flags"
.Ft fetchIO *
.Fn fetchXGetFTP "struct url *u" "struct url_stat *us" "const char *flags"
.Ft fetchIO *
.Fn fetchGetFTP "struct url *u" "const char *flags"
.Ft fetchIO *
.Fn fetchPutFTP "struct url *u" "const char *flags"
.Ft int
.Fn fetchStatFTP "struct url *u" "struct url_stat *us" "const char *flags"
.Ft int
.Fn fetchListFTP "struct url_list *list" "struct url *u" "const char *pattern" "const char *flags"
.Ft void
.Fn fetchInitURLList "struct url_list *ul"
.Ft int
.Fn fetchAppendURLList "struct url_list *dst" "const struct url_list *src"
.Ft void
.Fn fetchFreeURLList "struct url_list *ul"
.Ft char *
.Fn fetchUnquotePath "struct url *u"
.Ft char *
.Fn fetchUnquoteFilename "struct url *u"
.Ft char *
.Fn fetchStringifyURL "const struct url *u"
.Ft void
.Fn fetchConnectionCacheInit "int global" "int per_host"
.Ft void
.Fn fetchConnectionCacheClose "void"
.Sh DESCRIPTION
These functions implement a high-level library for retrieving and
uploading files using Uniform Resource Locators (URLs).
.Pp
.Fn fetchParseURL
takes a URL in the form of a null-terminated string and splits it into
its components function according to the Common Internet Scheme Syntax
detailed in RFC 1738.
A regular expression which produces this syntax is:
.Bd -literal -offset indent
\*[Lt]scheme\*[Gt]:(//(\*[Lt]user\*[Gt](:\*[Lt]pwd\*[Gt])?@)?\*[Lt]host\*[Gt](:\*[Lt]port\*[Gt])?)?/(\*[Lt]doc\*[Gt])?
.Ed
.Pp
If the URL does not seem to begin with a scheme name, it is assumed to be a local path.
Only absolute path names are accepted.
.Pp
Note that some components of the URL are not necessarily relevant to
all URL schemes.
For instance, the file scheme only needs the
.Aq scheme
and
.Aq doc
components.
.Fn fetchParseURL
quotes any unsafe character in the URL automatically.
This is not done by
.Fn fetchMakeURL .
.Fn fetchCopyURL
copies an existing
.Vt url
structure.
.Pp
.Fn fetchMakeURL ,
.Fn fetchParseURL ,
and
.Fn fetchCopyURL
return a pointer to a
.Vt url
structure, which is defined as follows in
.In fetch.h :
.Bd -literal
#define URL_SCHEMELEN 16
#define URL_USERLEN 256
#define URL_PWDLEN 256
#define URL_HOSTLEN 255
struct url {
char scheme[URL_SCHEMELEN + 1];
char user[URL_USERLEN + 1];
char pwd[URL_PWDLEN + 1];
char host[URL_HOSTLEN + 1];
int port;
char *doc;
off_t offset;
size_t length;
time_t last_modified;
};
.Ed
.Pp
The pointer returned by
.Fn fetchMakeURL ,
.Fn fetchCopyURL ,
and
.Fn fetchParseURL
should be freed using
.Fn fetchFreeURL .
The size of
.Vt struct URL
is not part of the ABI.
.Pp
.Fn fetchXGetURL ,
.Fn fetchGetURL ,
and
.Fn fetchPutURL
constitute the recommended interface to the
.Nm fetch
library.
They examine the URL passed to them to determine the transfer
method, and call the appropriate lower-level functions to perform the
actual transfer.
.Fn fetchXGetURL
also returns the remote document's metadata in the
.Vt url_stat
structure pointed to by the
.Fa us
argument.
.Pp
The
.Fa flags
argument is a string of characters which specify transfer options.
The
meaning of the individual flags is scheme-dependent, and is detailed
in the appropriate section below.
.Pp
.Fn fetchStatURL
attempts to obtain the requested document's metadata and fill in the
structure pointed to by its second argument.
The
.Vt url_stat
structure is defined as follows in
.In fetch.h :
.Bd -literal
struct url_stat {
off_t size;
time_t atime;
time_t mtime;
};
.Ed
.Pp
If the size could not be obtained from the server, the
.Fa size
field is set to \-1.
If the modification time could not be obtained from the server, the
.Fa mtime
field is set to the epoch.
If the access time could not be obtained from the server, the
.Fa atime
field is set to the modification time.
.Pp
.Fn fetchListURL
attempts to list the contents of the directory pointed to by the URL provided.
The pattern can be a simple glob-like expression as hint.
Callers should not depend on the server to filter names.
If successful, it appends the list of entries to the
.Vt url_list
structure.
The
.Vt url_list
structure is defined as follows in
.In fetch.h :
.Bd -literal
struct url_list {
size_t length;
size_t alloc_size;
struct url *urls;
};
.Ed
.Pp
The list should be initialized by calling
.Fn fetchInitURLList
and the entries be freed by calling
.Fn fetchFreeURLList .
The function
.Fn fetchAppendURLList
can be used to append one URL lists to another.
If the
.Ql c
(cache result) flag is specified, the library is allowed to internally
cache the result.
.Pp
.Fn fetchStringifyURL
returns the URL as string.
.Fn fetchUnquotePath
returns the path name part of the URL with any quoting undone.
Query arguments and fragment identifiers are not included.
.Fn fetchUnquoteFilename
returns the last component of the path name as returned by
.Fn fetchUnquotePath .
.Fn fetchStringifyURL ,
.Fn fetchUnquotePath ,
and
.Fn fetchUnquoteFilename
return a string that should be deallocated with
.Fn free
after use.
.Pp
.Fn fetchConnectionCacheInit
enables the connection cache.
The first argument specifies the global limit on cached connections.
The second argument specifies the host limit.
Entries are considered to specify the same host, if the host name
from the URL is identical, indepent of the address or address family.
.Fn fetchConnectionCacheClose
flushed the connection cache and closes all cached connections.
.Pp
.Fn fetchXGet ,
.Fn fetchGet ,
.Fn fetchPut ,
and
.Fn fetchStat
are similar to
.Fn fetchXGetURL ,
.Fn fetchGetURL ,
.Fn fetchPutURL ,
and
.Fn fetchStatURL ,
except that they expect a pre-parsed URL in the form of a pointer to
a
.Vt struct url
rather than a string.
.Pp
All of the
.Fn fetchXGetXXX ,
.Fn fetchGetXXX ,
and
.Fn fetchPutXXX
functions return a pointer to a stream which can be used to read or
write data from or to the requested document, respectively.
Note that
although the implementation details of the individual access methods
vary, it can generally be assumed that a stream returned by one of the
.Fn fetchXGetXXX
or
.Fn fetchGetXXX
functions is read-only, and that a stream returned by one of the
.Fn fetchPutXXX
functions is write-only.
.Sh PROTOCOL INDEPENDENT FLAGS
If the
.Ql i
(if-modified-since) flag is specified, the library will try to fetch
the content only if it is newer than
.Va last_modified .
For HTTP an
.Li If-Modified-Since
HTTP header is sent.
For FTP a
.Li MTDM
command is sent first and compared locally.
For FILE the source file is compared.
.Sh FILE SCHEME
.Fn fetchXGetFile ,
.Fn fetchGetFile ,
and
.Fn fetchPutFile
provide access to documents which are files in a locally mounted file
system.
Only the
.Aq document
component of the URL is used.
.Pp
.Fn fetchXGetFile
and
.Fn fetchGetFile
do not accept any flags.
.Pp
.Fn fetchPutFile
accepts the
.Ql a
(append to file) flag.
If that flag is specified, the data written to
the stream returned by
.Fn fetchPutFile
will be appended to the previous contents of the file, instead of
replacing them.
.Sh FTP SCHEME
.Fn fetchXGetFTP ,
.Fn fetchGetFTP ,
and
.Fn fetchPutFTP
implement the FTP protocol as described in RFC 959.
.Pp
By default
.Nm libfetch
will attempt to use passive mode first and only fallback to active mode
if the server reports a syntax error.
If the
.Ql a
(active) flag is specified, a passive connection is not tried and active mode
is used directly.
.Pp
If the
.Ql l
(low) flag is specified, data sockets will be allocated in the low (or
default) port range instead of the high port range (see
.Xr ip 4 ) .
.Pp
If the
.Ql d
(direct) flag is specified,
.Fn fetchXGetFTP ,
.Fn fetchGetFTP ,
and
.Fn fetchPutFTP
will use a direct connection even if a proxy server is defined.
.Pp
If no user name or password is given, the
.Nm fetch
library will attempt an anonymous login, with user name "anonymous"
and password "anonymous@\*[Lt]hostname\*[Gt]".
.Sh HTTP SCHEME
The
.Fn fetchXGetHTTP ,
.Fn fetchGetHTTP ,
and
.Fn fetchPutHTTP
functions implement the HTTP/1.1 protocol.
With a little luck, there is
even a chance that they comply with RFC 2616 and RFC 2617.
.Pp
If the
.Ql d
(direct) flag is specified,
.Fn fetchXGetHTTP ,
.Fn fetchGetHTTP ,
and
.Fn fetchPutHTTP
will use a direct connection even if a proxy server is defined.
.Pp
Since there seems to be no good way of implementing the HTTP PUT
method in a manner consistent with the rest of the
.Nm fetch
library,
.Fn fetchPutHTTP
is currently unimplemented.
.Sh AUTHENTICATION
Apart from setting the appropriate environment variables and
specifying the user name and password in the URL or the
.Vt struct url ,
the calling program has the option of defining an authentication
function with the following prototype:
.Pp
.Ft int
.Fn myAuthMethod "struct url *u"
.Pp
The callback function should fill in the
.Fa user
and
.Fa pwd
fields in the provided
.Vt struct url
and return 0 on success, or any other value to indicate failure.
.Pp
To register the authentication callback, simply set
.Va fetchAuthMethod
to point at it.
The callback will be used whenever a site requires authentication and
the appropriate environment variables are not set.
.Pp
This interface is experimental and may be subject to change.
.Sh RETURN VALUES
.Fn fetchParseURL
returns a pointer to a
.Vt struct url
containing the individual components of the URL.
If it is
unable to allocate memory, or the URL is syntactically incorrect,
.Fn fetchParseURL
returns a
.Dv NULL
pointer.
.Pp
The
.Fn fetchStat
functions return 0 on success and \-1 on failure.
.Pp
All other functions return a stream pointer which may be used to
access the requested document, or
.Dv NULL
if an error occurred.
.Pp
The following error codes are defined in
.In fetch.h :
.Bl -tag -width 18n
.It Bq Er FETCH_ABORT
Operation aborted
.It Bq Er FETCH_AUTH
Authentication failed
.It Bq Er FETCH_DOWN
Service unavailable
.It Bq Er FETCH_EXISTS
File exists
.It Bq Er FETCH_FULL
File system full
.It Bq Er FETCH_INFO
Informational response
.It Bq Er FETCH_MEMORY
Insufficient memory
.It Bq Er FETCH_MOVED
File has moved
.It Bq Er FETCH_NETWORK
Network error
.It Bq Er FETCH_OK
No error
.It Bq Er FETCH_PROTO
Protocol error
.It Bq Er FETCH_RESOLV
Resolver error
.It Bq Er FETCH_SERVER
Server error
.It Bq Er FETCH_TEMP
Temporary error
.It Bq Er FETCH_TIMEOUT
Operation timed out
.It Bq Er FETCH_UNAVAIL
File is not available
.It Bq Er FETCH_UNKNOWN
Unknown error
.It Bq Er FETCH_URL
Invalid URL
.El
.Pp
The accompanying error message includes a protocol-specific error code
and message, e.g.\& "File is not available (404 Not Found)"
.Sh ENVIRONMENT
.Bl -tag -width ".Ev FETCH_BIND_ADDRESS"
.It Ev FETCH_BIND_ADDRESS
Specifies a host name or IP address to which sockets used for outgoing
connections will be bound.
.It Ev FTP_LOGIN
Default FTP login if none was provided in the URL.
.It Ev FTP_PASSIVE_MODE
If set to anything but
.Ql no ,
forces the FTP code to use passive mode.
.It Ev FTP_PASSWORD
Default FTP password if the remote server requests one and none was
provided in the URL.
.It Ev FTP_PROXY
URL of the proxy to use for FTP requests.
The document part is ignored.
FTP and HTTP proxies are supported; if no scheme is specified, FTP is
assumed.
If the proxy is an FTP proxy,
.Nm libfetch
will send
.Ql user@host
as user name to the proxy, where
.Ql user
is the real user name, and
.Ql host
is the name of the FTP server.
.Pp
If this variable is set to an empty string, no proxy will be used for
FTP requests, even if the
.Ev HTTP_PROXY
variable is set.
.It Ev ftp_proxy
Same as
.Ev FTP_PROXY ,
for compatibility.
.It Ev HTTP_AUTH
Specifies HTTP authorization parameters as a colon-separated list of
items.
The first and second item are the authorization scheme and realm
respectively; further items are scheme-dependent.
Currently, only basic authorization is supported.
.Pp
Basic authorization requires two parameters: the user name and
password, in that order.
.Pp
This variable is only used if the server requires authorization and
no user name or password was specified in the URL.
.It Ev HTTP_PROXY
URL of the proxy to use for HTTP requests.
The document part is ignored.
Only HTTP proxies are supported for HTTP requests.
If no port number is specified, the default is 3128.
.Pp
Note that this proxy will also be used for FTP documents, unless the
.Ev FTP_PROXY
variable is set.
.It Ev http_proxy
Same as
.Ev HTTP_PROXY ,
for compatibility.
.It Ev HTTP_PROXY_AUTH
Specifies authorization parameters for the HTTP proxy in the same
format as the
.Ev HTTP_AUTH
variable.
.Pp
This variable is used if and only if connected to an HTTP proxy, and
is ignored if a user and/or a password were specified in the proxy
URL.
.It Ev HTTP_REFERER
Specifies the referrer URL to use for HTTP requests.
If set to
.Dq auto ,
the document URL will be used as referrer URL.
.It Ev HTTP_USER_AGENT
Specifies the User-Agent string to use for HTTP requests.
This can be useful when working with HTTP origin or proxy servers that
differentiate between user agents.
.It Ev NETRC
Specifies a file to use instead of
.Pa ~/.netrc
to look up login names and passwords for FTP sites.
See
.Xr ftp 1
for a description of the file format.
This feature is experimental.
.It Ev NO_PROXY
Either a single asterisk, which disables the use of proxies
altogether, or a comma- or whitespace-separated list of hosts for
which proxies should not be used.
.It Ev no_proxy
Same as
.Ev NO_PROXY ,
for compatibility.
.El
.Sh EXAMPLES
To access a proxy server on
.Pa proxy.example.com
port 8080, set the
.Ev HTTP_PROXY
environment variable in a manner similar to this:
.Pp
.Dl HTTP_PROXY=http://proxy.example.com:8080
.Pp
If the proxy server requires authentication, there are
two options available for passing the authentication data.
The first method is by using the proxy URL:
.Pp
.Dl HTTP_PROXY=http://\*[Lt]user\*[Gt]:\*[Lt]pwd\*[Gt]@proxy.example.com:8080
.Pp
The second method is by using the
.Ev HTTP_PROXY_AUTH
environment variable:
.Bd -literal -offset indent
HTTP_PROXY=http://proxy.example.com:8080
HTTP_PROXY_AUTH=basic:*:\*[Lt]user\*[Gt]:\*[Lt]pwd\*[Gt]
.Ed
.Pp
To disable the use of a proxy for an HTTP server running on the local
host, define
.Ev NO_PROXY
as follows:
.Bd -literal -offset indent
NO_PROXY=localhost,127.0.0.1
.Ed
.Sh SEE ALSO
.\" .Xr fetch 1 ,
.\" .Xr ftpio 3 ,
.Xr ftp 1 ,
.Xr ip 4
.Rs
.%A J. Postel
.%A J. K. Reynolds
.%D October 1985
.%B File Transfer Protocol
.%O RFC 959
.Re
.Rs
.%A P. Deutsch
.%A A. Emtage
.%A A. Marine
.%D May 1994
.%T How to Use Anonymous FTP
.%O RFC 1635
.Re
.Rs
.%A T. Berners-Lee
.%A L. Masinter
.%A M. McCahill
.%D December 1994
.%T Uniform Resource Locators (URL)
.%O RFC 1738
.Re
.Rs
.%A R. Fielding
.%A J. Gettys
.%A J. Mogul
.%A H. Frystyk
.%A L. Masinter
.%A P. Leach
.%A T. Berners-Lee
.%D January 1999
.%B Hypertext Transfer Protocol -- HTTP/1.1
.%O RFC 2616
.Re
.Rs
.%A J. Franks
.%A P. Hallam-Baker
.%A J. Hostetler
.%A S. Lawrence
.%A P. Leach
.%A A. Luotonen
.%A L. Stewart
.%D June 1999
.%B HTTP Authentication: Basic and Digest Access Authentication
.%O RFC 2617
.Re
.Sh HISTORY
The
.Nm fetch
library first appeared in
.Fx 3.0 .
.Sh AUTHORS
.An -nosplit
The
.Nm fetch
library was mostly written by
.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org
with numerous suggestions from
.An Jordan K. Hubbard Aq Mt jkh@FreeBSD.org ,
.An Eugene Skepner Aq Mt eu@qub.com
and other
.Fx
developers.
It replaces the older
.Nm ftpio
library written by
.An Poul-Henning Kamp Aq Mt phk@FreeBSD.org
and
.An Jordan K. Hubbard Aq Mt jkh@FreeBSD.org .
.Pp
This manual page was written by
.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org .
.Sh BUGS
Some parts of the library are not yet implemented.
The most notable
examples of this are
.Fn fetchPutHTTP
and FTP proxy support.
.Pp
There is no way to select a proxy at run-time other than setting the
.Ev HTTP_PROXY
or
.Ev FTP_PROXY
environment variables as appropriate.
.Pp
.Nm libfetch
does not understand or obey 305 (Use Proxy) replies.
.Pp
Error numbers are unique only within a certain context; the error
codes used for FTP and HTTP overlap, as do those used for resolver and
system errors.
For instance, error code 202 means "Command not
implemented, superfluous at this site" in an FTP context and
"Accepted" in an HTTP context.
.Pp
.Fn fetchStatFTP
does not check that the result of an MDTM command is a valid date.
.Pp
The man page is incomplete, poorly written and produces badly
formatted text.
.Pp
The error reporting mechanism is unsatisfactory.
.Pp
Some parts of the code are not fully reentrant.

627
libfetch/fetch.c Normal file
View File

@ -0,0 +1,627 @@
/* $NetBSD: fetch.c,v 1.19 2009/08/11 20:48:06 joerg Exp $ */
/*-
* Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav
* Copyright (c) 2008 Joerg Sonnenberger <joerg@NetBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD: fetch.c,v 1.41 2007/12/19 00:26:36 des Exp $
*/
#if HAVE_CONFIG_H
#include "config.h"
#endif
#ifndef NETBSD
#include <nbcompat.h>
#endif
#include <ctype.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "fetch.h"
#include "common.h"
auth_t fetchAuthMethod;
int fetchLastErrCode;
char fetchLastErrString[MAXERRSTRING];
int fetchTimeout;
volatile int fetchRestartCalls = 1;
int fetchDebug;
/*** Local data **************************************************************/
/*
* Error messages for parser errors
*/
#define URL_MALFORMED 1
#define URL_BAD_SCHEME 2
#define URL_BAD_PORT 3
static struct fetcherr url_errlist[] = {
{ URL_MALFORMED, FETCH_URL, "Malformed URL" },
{ URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" },
{ URL_BAD_PORT, FETCH_URL, "Invalid server port" },
{ -1, FETCH_UNKNOWN, "Unknown parser error" }
};
/*** Public API **************************************************************/
/*
* Select the appropriate protocol for the URL scheme, and return a
* read-only stream connected to the document referenced by the URL.
* Also fill out the struct url_stat.
*/
fetchIO *
fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
{
if (us != NULL) {
us->size = -1;
us->atime = us->mtime = 0;
}
if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
return (fetchXGetFile(URL, us, flags));
else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
return (fetchXGetFTP(URL, us, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
return (fetchXGetHTTP(URL, us, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
return (fetchXGetHTTP(URL, us, flags));
url_seterr(URL_BAD_SCHEME);
return (NULL);
}
/*
* Select the appropriate protocol for the URL scheme, and return a
* read-only stream connected to the document referenced by the URL.
*/
fetchIO *
fetchGet(struct url *URL, const char *flags)
{
return (fetchXGet(URL, NULL, flags));
}
/*
* Select the appropriate protocol for the URL scheme, and return a
* write-only stream connected to the document referenced by the URL.
*/
fetchIO *
fetchPut(struct url *URL, const char *flags)
{
if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
return (fetchPutFile(URL, flags));
else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
return (fetchPutFTP(URL, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
return (fetchPutHTTP(URL, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
return (fetchPutHTTP(URL, flags));
url_seterr(URL_BAD_SCHEME);
return (NULL);
}
/*
* Select the appropriate protocol for the URL scheme, and return the
* size of the document referenced by the URL if it exists.
*/
int
fetchStat(struct url *URL, struct url_stat *us, const char *flags)
{
if (us != NULL) {
us->size = -1;
us->atime = us->mtime = 0;
}
if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
return (fetchStatFile(URL, us, flags));
else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
return (fetchStatFTP(URL, us, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
return (fetchStatHTTP(URL, us, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
return (fetchStatHTTP(URL, us, flags));
url_seterr(URL_BAD_SCHEME);
return (-1);
}
/*
* Select the appropriate protocol for the URL scheme, and return a
* list of files in the directory pointed to by the URL.
*/
int
fetchList(struct url_list *ue, struct url *URL, const char *pattern,
const char *flags)
{
if (strcasecmp(URL->scheme, SCHEME_FILE) == 0)
return (fetchListFile(ue, URL, pattern, flags));
else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0)
return (fetchListFTP(ue, URL, pattern, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTP) == 0)
return (fetchListHTTP(ue, URL, pattern, flags));
else if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0)
return (fetchListHTTP(ue, URL, pattern, flags));
url_seterr(URL_BAD_SCHEME);
return -1;
}
/*
* Attempt to parse the given URL; if successful, call fetchXGet().
*/
fetchIO *
fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
{
struct url *u;
fetchIO *f;
if ((u = fetchParseURL(URL)) == NULL)
return (NULL);
f = fetchXGet(u, us, flags);
fetchFreeURL(u);
return (f);
}
/*
* Attempt to parse the given URL; if successful, call fetchGet().
*/
fetchIO *
fetchGetURL(const char *URL, const char *flags)
{
return (fetchXGetURL(URL, NULL, flags));
}
/*
* Attempt to parse the given URL; if successful, call fetchPut().
*/
fetchIO *
fetchPutURL(const char *URL, const char *flags)
{
struct url *u;
fetchIO *f;
if ((u = fetchParseURL(URL)) == NULL)
return (NULL);
f = fetchPut(u, flags);
fetchFreeURL(u);
return (f);
}
/*
* Attempt to parse the given URL; if successful, call fetchStat().
*/
int
fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
{
struct url *u;
int s;
if ((u = fetchParseURL(URL)) == NULL)
return (-1);
s = fetchStat(u, us, flags);
fetchFreeURL(u);
return (s);
}
/*
* Attempt to parse the given URL; if successful, call fetchList().
*/
int
fetchListURL(struct url_list *ue, const char *URL, const char *pattern,
const char *flags)
{
struct url *u;
int rv;
if ((u = fetchParseURL(URL)) == NULL)
return -1;
rv = fetchList(ue, u, pattern, flags);
fetchFreeURL(u);
return rv;
}
/*
* Make a URL
*/
struct url *
fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
const char *user, const char *pwd)
{
struct url *u;
if (!scheme || (!host && !doc)) {
url_seterr(URL_MALFORMED);
return (NULL);
}
if (port < 0 || port > 65535) {
url_seterr(URL_BAD_PORT);
return (NULL);
}
/* allocate struct url */
if ((u = calloc(1, sizeof(*u))) == NULL) {
fetch_syserr();
return (NULL);
}
if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
fetch_syserr();
free(u);
return (NULL);
}
#define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
seturl(scheme);
seturl(host);
seturl(user);
seturl(pwd);
#undef seturl
u->port = port;
return (u);
}
int
fetch_urlpath_safe(char x)
{
if ((x >= '0' && x <= '9') || (x >= 'A' && x <= 'Z') ||
(x >= 'a' && x <= 'z'))
return 1;
switch (x) {
case '$':
case '-':
case '_':
case '.':
case '+':
case '!':
case '*':
case '\'':
case '(':
case ')':
case ',':
/* The following are allowed in segment and path components: */
case '?':
case ':':
case '@':
case '&':
case '=':
case '/':
case ';':
/* If something is already quoted... */
case '%':
return 1;
default:
return 0;
}
}
/*
* Copy an existing URL.
*/
struct url *
fetchCopyURL(const struct url *src)
{
struct url *dst;
char *doc;
/* allocate struct url */
if ((dst = malloc(sizeof(*dst))) == NULL) {
fetch_syserr();
return (NULL);
}
if ((doc = strdup(src->doc)) == NULL) {
fetch_syserr();
free(dst);
return (NULL);
}
*dst = *src;
dst->doc = doc;
return dst;
}
/*
* Split an URL into components. URL syntax is:
* [method:/][/[user[:pwd]@]host[:port]/][document]
* This almost, but not quite, RFC1738 URL syntax.
*/
struct url *
fetchParseURL(const char *URL)
{
const char *p, *q;
struct url *u;
size_t i, count;
int pre_quoted;
/* allocate struct url */
if ((u = calloc(1, sizeof(*u))) == NULL) {
fetch_syserr();
return (NULL);
}
if (*URL == '/') {
pre_quoted = 0;
strcpy(u->scheme, SCHEME_FILE);
p = URL;
goto quote_doc;
}
if (strncmp(URL, "file:", 5) == 0) {
pre_quoted = 1;
strcpy(u->scheme, SCHEME_FILE);
URL += 5;
if (URL[0] != '/' || URL[1] != '/' || URL[2] != '/') {
url_seterr(URL_MALFORMED);
goto ouch;
}
p = URL + 2;
goto quote_doc;
}
if (strncmp(URL, "http:", 5) == 0 ||
strncmp(URL, "https:", 6) == 0) {
pre_quoted = 1;
if (URL[4] == ':') {
strcpy(u->scheme, SCHEME_HTTP);
URL += 5;
} else {
strcpy(u->scheme, SCHEME_HTTPS);
URL += 6;
}
if (URL[0] != '/' || URL[1] != '/') {
url_seterr(URL_MALFORMED);
goto ouch;
}
URL += 2;
p = URL;
goto find_user;
}
if (strncmp(URL, "ftp:", 4) == 0) {
pre_quoted = 1;
strcpy(u->scheme, SCHEME_FTP);
URL += 4;
if (URL[0] != '/' || URL[1] != '/') {
url_seterr(URL_MALFORMED);
goto ouch;
}
URL += 2;
p = URL;
goto find_user;
}
url_seterr(URL_BAD_SCHEME);
goto ouch;
find_user:
p = strpbrk(URL, "/@");
if (p != NULL && *p == '@') {
/* username */
for (q = URL, i = 0; (*q != ':') && (*q != '@'); q++) {
if (i < URL_USERLEN)
u->user[i++] = *q;
}
/* password */
if (*q == ':') {
for (q++, i = 0; (*q != '@'); q++)
if (i < URL_PWDLEN)
u->pwd[i++] = *q;
}
p++;
} else {
p = URL;
}
/* hostname */
#ifdef INET6
if (*p == '[' && (q = strchr(p + 1, ']')) != NULL &&
(*++q == '\0' || *q == '/' || *q == ':')) {
if ((i = q - p - 2) > URL_HOSTLEN)
i = URL_HOSTLEN;
strncpy(u->host, ++p, i);
p = q;
} else
#endif
for (i = 0; *p && (*p != '/') && (*p != ':'); p++)
if (i < URL_HOSTLEN)
u->host[i++] = *p;
/* port */
if (*p == ':') {
for (q = ++p; *q && (*q != '/'); q++)
if (isdigit((unsigned char)*q))
u->port = u->port * 10 + (*q - '0');
else {
/* invalid port */
url_seterr(URL_BAD_PORT);
goto ouch;
}
p = q;
}
/* document */
if (!*p)
p = "/";
quote_doc:
count = 1;
for (i = 0; p[i] != '\0'; ++i) {
if ((!pre_quoted && p[i] == '%') ||
!fetch_urlpath_safe(p[i]))
count += 3;
else
++count;
}
if ((u->doc = malloc(count)) == NULL) {
fetch_syserr();
goto ouch;
}
for (i = 0; *p != '\0'; ++p) {
if ((!pre_quoted && *p == '%') ||
!fetch_urlpath_safe(*p)) {
u->doc[i++] = '%';
if ((unsigned char)*p < 160)
u->doc[i++] = '0' + ((unsigned char)*p) / 16;
else
u->doc[i++] = 'a' - 10 + ((unsigned char)*p) / 16;
if ((unsigned char)*p % 16 < 10)
u->doc[i++] = '0' + ((unsigned char)*p) % 16;
else
u->doc[i++] = 'a' - 10 + ((unsigned char)*p) % 16;
} else
u->doc[i++] = *p;
}
u->doc[i] = '\0';
return (u);
ouch:
free(u);
return (NULL);
}
/*
* Free a URL
*/
void
fetchFreeURL(struct url *u)
{
free(u->doc);
free(u);
}
static char
xdigit2digit(char digit)
{
digit = tolower((unsigned char)digit);
if (digit >= 'a' && digit <= 'f')
digit = digit - 'a' + 10;
else
digit = digit - '0';
return digit;
}
/*
* Unquote whole URL.
* Skips optional parts like query or fragment identifier.
*/
char *
fetchUnquotePath(struct url *url)
{
char *unquoted;
const char *iter;
size_t i;
if ((unquoted = malloc(strlen(url->doc) + 1)) == NULL)
return NULL;
for (i = 0, iter = url->doc; *iter != '\0'; ++iter) {
if (*iter == '#' || *iter == '?')
break;
if (iter[0] != '%' ||
!isxdigit((unsigned char)iter[1]) ||
!isxdigit((unsigned char)iter[2])) {
unquoted[i++] = *iter;
continue;
}
unquoted[i++] = xdigit2digit(iter[1]) * 16 +
xdigit2digit(iter[2]);
iter += 2;
}
unquoted[i] = '\0';
return unquoted;
}
/*
* Extract the file name component of a URL.
*/
char *
fetchUnquoteFilename(struct url *url)
{
char *unquoted, *filename;
const char *last_slash;
if ((unquoted = fetchUnquotePath(url)) == NULL)
return NULL;
if ((last_slash = strrchr(unquoted, '/')) == NULL)
return unquoted;
filename = strdup(last_slash + 1);
free(unquoted);
return filename;
}
char *
fetchStringifyURL(const struct url *url)
{
size_t total;
char *doc;
/* scheme :// user : pwd @ host :port doc */
total = strlen(url->scheme) + 3 + strlen(url->user) + 1 +
strlen(url->pwd) + 1 + strlen(url->host) + 6 + strlen(url->doc) + 1;
if ((doc = malloc(total)) == NULL)
return NULL;
if (url->port != 0)
snprintf(doc, total, "%s%s%s%s%s%s%s:%d%s",
url->scheme,
url->scheme[0] != '\0' ? "://" : "",
url->user,
url->pwd[0] != '\0' ? ":" : "",
url->pwd,
url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "",
url->host,
(int)url->port,
url->doc);
else {
snprintf(doc, total, "%s%s%s%s%s%s%s%s",
url->scheme,
url->scheme[0] != '\0' ? "://" : "",
url->user,
url->pwd[0] != '\0' ? ":" : "",
url->pwd,
url->user[0] != '\0' || url->pwd[0] != '\0' ? "@" : "",
url->host,
url->doc);
}
return doc;
}

526
libfetch/fetch.cat3 Normal file
View File

@ -0,0 +1,526 @@
FETCH(3) NetBSD Library Functions Manual FETCH(3)
NNAAMMEE
ffeettcchhMMaakkeeUURRLL, ffeettcchhPPaarrsseeUURRLL, ffeettcchhCCooppyyUURRLL, ffeettcchhFFrreeeeUURRLL, ffeettcchhXXGGeettUURRLL,
ffeettcchhGGeettUURRLL, ffeettcchhPPuuttUURRLL, ffeettcchhSSttaattUURRLL, ffeettcchhLLiissttUURRLL, ffeettcchhXXGGeett,
ffeettcchhGGeett, ffeettcchhPPuutt, ffeettcchhSSttaatt, ffeettcchhLLiisstt, ffeettcchhXXGGeettFFiillee, ffeettcchhGGeettFFiillee,
ffeettcchhPPuuttFFiillee, ffeettcchhSSttaattFFiillee, ffeettcchhLLiissttFFiillee, ffeettcchhXXGGeettHHTTTTPP, ffeettcchhGGeettHHTTTTPP,
ffeettcchhPPuuttHHTTTTPP, ffeettcchhSSttaattHHTTTTPP, ffeettcchhLLiissttHHTTTTPP, ffeettcchhXXGGeettFFTTPP, ffeettcchhGGeettFFTTPP,
ffeettcchhPPuuttFFTTPP, ffeettcchhSSttaattFFTTPP, ffeettcchhLLiissttFFTTPP ffeettcchhIInniittUURRLLLLiisstt,
ffeettcchhFFrreeeeUURRLLLLiisstt, ffeettcchhUUnnqquuootteePPaatthh, ffeettcchhUUnnqquuootteeFFiilleennaammee,
ffeettcchhSSttrriinnggiiffyyUURRLL, ffeettcchhCCoonnnneeccttiioonnCCaacchheeIInniitt, ffeettcchhCCoonnnneeccttiioonnCCaacchheeCClloossee,
ffeettcchh -- file transfer functions
LLIIBBRRAARRYY
File Transfer Library for URLs (libfetch, -lfetch)
SSYYNNOOPPSSIISS
##iinncclluuddee <<ssttddiioo..hh>>
##iinncclluuddee <<ffeettcchh..hh>>
_s_t_r_u_c_t _u_r_l _*
ffeettcchhMMaakkeeUURRLL(_c_o_n_s_t _c_h_a_r _*_s_c_h_e_m_e, _c_o_n_s_t _c_h_a_r _*_h_o_s_t, _i_n_t _p_o_r_t,
_c_o_n_s_t _c_h_a_r _*_d_o_c, _c_o_n_s_t _c_h_a_r _*_u_s_e_r, _c_o_n_s_t _c_h_a_r _*_p_w_d);
_s_t_r_u_c_t _u_r_l _*
ffeettcchhPPaarrsseeUURRLL(_c_o_n_s_t _c_h_a_r _*_U_R_L);
_s_t_r_u_c_t _u_r_l _*
ffeettcchhCCooppyyUURRLL(_c_o_n_s_t _s_t_r_u_c_t _u_r_l _*_u);
_v_o_i_d
ffeettcchhFFrreeeeUURRLL(_s_t_r_u_c_t _u_r_l _*_u);
_f_e_t_c_h_I_O _*
ffeettcchhXXGGeettUURRLL(_c_o_n_s_t _c_h_a_r _*_U_R_L, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhGGeettUURRLL(_c_o_n_s_t _c_h_a_r _*_U_R_L, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhPPuuttUURRLL(_c_o_n_s_t _c_h_a_r _*_U_R_L, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_i_n_t
ffeettcchhSSttaattUURRLL(_c_o_n_s_t _c_h_a_r _*_U_R_L, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_i_n_t
ffeettcchhLLiissttUURRLL(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_l_i_s_t, _c_o_n_s_t _c_h_a_r _*_U_R_L, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhXXGGeett(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhGGeett(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhPPuutt(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_i_n_t
ffeettcchhSSttaatt(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_i_n_t
ffeettcchhLLiisstt(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_l_i_s_t, _s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhXXGGeettFFiillee(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhGGeettFFiillee(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhPPuuttFFiillee(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_i_n_t
ffeettcchhSSttaattFFiillee(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_i_n_t
ffeettcchhLLiissttFFiillee(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_l_i_s_t, _s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhXXGGeettHHTTTTPP(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhGGeettHHTTTTPP(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhPPuuttHHTTTTPP(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_i_n_t
ffeettcchhSSttaattHHTTTTPP(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_i_n_t
ffeettcchhLLiissttHHTTTTPP(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_l_i_s_t, _s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhXXGGeettFFTTPP(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhGGeettFFTTPP(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_f_e_t_c_h_I_O _*
ffeettcchhPPuuttFFTTPP(_s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_i_n_t
ffeettcchhSSttaattFFTTPP(_s_t_r_u_c_t _u_r_l _*_u, _s_t_r_u_c_t _u_r_l___s_t_a_t _*_u_s, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_i_n_t
ffeettcchhLLiissttFFTTPP(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_l_i_s_t, _s_t_r_u_c_t _u_r_l _*_u, _c_o_n_s_t _c_h_a_r _*_f_l_a_g_s);
_v_o_i_d
ffeettcchhIInniittUURRLLLLiisstt(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_u_l);
_i_n_t
ffeettcchhAAppppeennddUURRLLLLiisstt(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_d_s_t, _c_o_n_s_t _s_t_r_u_c_t _u_r_l___l_i_s_t _*_s_r_c);
_v_o_i_d
ffeettcchhFFrreeeeUURRLLLLiisstt(_s_t_r_u_c_t _u_r_l___l_i_s_t _*_u_l);
_c_h_a_r _*
ffeettcchhUUnnqquuootteePPaatthh(_s_t_r_u_c_t _u_r_l _*_u);
_c_h_a_r _*
ffeettcchhUUnnqquuootteeFFiilleennaammee(_s_t_r_u_c_t _u_r_l _*_u);
_c_h_a_r _*
ffeettcchhSSttrriinnggiiffyyUURRLL(_c_o_n_s_t _s_t_r_u_c_t _u_r_l _*_u);
_v_o_i_d
ffeettcchhCCoonnnneeccttiioonnCCaacchheeIInniitt(_i_n_t _g_l_o_b_a_l, _i_n_t _p_e_r___h_o_s_t);
_v_o_i_d
ffeettcchhCCoonnnneeccttiioonnCCaacchheeCClloossee(_v_o_i_d);
DDEESSCCRRIIPPTTIIOONN
These functions implement a high-level library for retrieving and upload-
ing files using Uniform Resource Locators (URLs).
ffeettcchhPPaarrsseeUURRLL() takes a URL in the form of a null-terminated string and
splits it into its components function according to the Common Internet
Scheme Syntax detailed in RFC 1738. A regular expression which produces
this syntax is:
<scheme>:(//(<user>(:<pwd>)?@)?<host>(:<port>)?)?/(<document>)?
If the URL does not seem to begin with a scheme name, it is assumed to be
a local path. Only absolute path names are accepted.
Note that some components of the URL are not necessarily relevant to all
URL schemes. For instance, the file scheme only needs the <scheme> and
<document> components. ffeettcchhPPaarrsseeUURRLL() quotes any unsafe character in
the URL automatically. This is not done by ffeettcchhMMaakkeeUURRLL().
ffeettcchhCCooppyyUURRLL() copies an existing _u_r_l structure.
ffeettcchhMMaakkeeUURRLL(), ffeettcchhPPaarrsseeUURRLL(), and ffeettcchhCCooppyyUURRLL() return a pointer to a
_u_r_l structure, which is defined as follows in <_f_e_t_c_h_._h>:
#define URL_SCHEMELEN 16
#define URL_USERLEN 256
#define URL_PWDLEN 256
#define URL_HOSTLEN 255
struct url {
char scheme[URL_SCHEMELEN + 1];
char user[URL_USERLEN + 1];
char pwd[URL_PWDLEN + 1];
char host[URL_HOSTLEN + 1];
int port;
char *doc;
off_t offset;
size_t length;
time_t last_modified;
};
The pointer returned by ffeettcchhMMaakkeeUURRLL(), ffeettcchhCCooppyyUURRLL(), and
ffeettcchhPPaarrsseeUURRLL() should be freed using ffeettcchhFFrreeeeUURRLL(). The size of _s_t_r_u_c_t
_U_R_L is not part of the ABI.
ffeettcchhXXGGeettUURRLL(), ffeettcchhGGeettUURRLL(), and ffeettcchhPPuuttUURRLL() constitute the recom-
mended interface to the ffeettcchh library. They examine the URL passed to
them to determine the transfer method, and call the appropriate lower-
level functions to perform the actual transfer. ffeettcchhXXGGeettUURRLL() also
returns the remote document's metadata in the _u_r_l___s_t_a_t structure pointed
to by the _u_s argument.
The _f_l_a_g_s argument is a string of characters which specify transfer
options. The meaning of the individual flags is scheme-dependent, and is
detailed in the appropriate section below.
ffeettcchhSSttaattUURRLL() attempts to obtain the requested document's metadata and
fill in the structure pointed to by its second argument. The _u_r_l___s_t_a_t
structure is defined as follows in <_f_e_t_c_h_._h>:
struct url_stat {
off_t size;
time_t atime;
time_t mtime;
};
If the size could not be obtained from the server, the _s_i_z_e field is set
to -1. If the modification time could not be obtained from the server,
the _m_t_i_m_e field is set to the epoch. If the access time could not be
obtained from the server, the _a_t_i_m_e field is set to the modification
time.
ffeettcchhLLiissttUURRLL() attempts to list the contents of the directory pointed to
by the URL provided. The pattern can be a simple glob-like expression as
hint. Callers should not depend on the server to filter names. If suc-
cessful, it appends the list of entries to the _u_r_l___l_i_s_t structure. The
_u_r_l___l_i_s_t structure is defined as follows in <_f_e_t_c_h_._h>:
struct url_list {
size_t length;
size_t alloc_size;
struct url *urls;
};
The list should be initialized by calling ffeettcchhIInniittUURRLLLLiisstt() and the
entries be freed by calling ffeettcchhFFrreeeeUURRLLLLiisstt(). The function
ffeettcchhAAppppeennddUURRLLLLiisstt() can be used to append one URL lists to another. If
the `c' (cache result) flag is specified, the library is allowed to
internally cache the result.
ffeettcchhSSttrriinnggiiffyyUURRLL() returns the URL as string. ffeettcchhUUnnqquuootteePPaatthh()
returns the path name part of the URL with any quoting undone. Query
arguments and fragment identifiers are not included.
ffeettcchhUUnnqquuootteeFFiilleennaammee() returns the last component of the path name as
returned by ffeettcchhUUnnqquuootteePPaatthh(). ffeettcchhSSttrriinnggiiffyyUURRLL(), ffeettcchhUUnnqquuootteePPaatthh(),
and ffeettcchhUUnnqquuootteeFFiilleennaammee() return a string that should be deallocated
with ffrreeee() after use.
ffeettcchhCCoonnnneeccttiioonnCCaacchheeIInniitt() enables the connection cache. The first argu-
ment specifies the global limit on cached connections. The second argu-
ment specifies the host limit. Entries are considered to specify the
same host, if the host name from the URL is identical, indepent of the
address or address family. ffeettcchhCCoonnnneeccttiioonnCCaacchheeCClloossee() flushed the con-
nection cache and closes all cached connections.
ffeettcchhXXGGeett(), ffeettcchhGGeett(), ffeettcchhPPuutt(), and ffeettcchhSSttaatt() are similar to
ffeettcchhXXGGeettUURRLL(), ffeettcchhGGeettUURRLL(), ffeettcchhPPuuttUURRLL(), and ffeettcchhSSttaattUURRLL(), except
that they expect a pre-parsed URL in the form of a pointer to a _s_t_r_u_c_t
_u_r_l rather than a string.
All of the ffeettcchhXXGGeettXXXXXX(), ffeettcchhGGeettXXXXXX(), and ffeettcchhPPuuttXXXXXX() functions
return a pointer to a stream which can be used to read or write data from
or to the requested document, respectively. Note that although the
implementation details of the individual access methods vary, it can gen-
erally be assumed that a stream returned by one of the ffeettcchhXXGGeettXXXXXX() or
ffeettcchhGGeettXXXXXX() functions is read-only, and that a stream returned by one
of the ffeettcchhPPuuttXXXXXX() functions is write-only.
PPRROOTTOOCCOOLL IINNDDEEPPEENNDDEENNTT FFLLAAGGSS
If the `i' (if-modified-since) flag is specified, the library will try to
fetch the content only if it is newer than _l_a_s_t___m_o_d_i_f_i_e_d. For HTTP an
If-Modified-Since HTTP header is sent. For FTP a MTDM command is sent
first and compared locally. For FILE the source file is compared.
FFIILLEE SSCCHHEEMMEE
ffeettcchhXXGGeettFFiillee(), ffeettcchhGGeettFFiillee(), and ffeettcchhPPuuttFFiillee() provide access to
documents which are files in a locally mounted file system. Only the
<document> component of the URL is used.
ffeettcchhXXGGeettFFiillee() and ffeettcchhGGeettFFiillee() do not accept any flags.
ffeettcchhPPuuttFFiillee() accepts the `a' (append to file) flag. If that flag is
specified, the data written to the stream returned by ffeettcchhPPuuttFFiillee() will
be appended to the previous contents of the file, instead of replacing
them.
FFTTPP SSCCHHEEMMEE
ffeettcchhXXGGeettFFTTPP(), ffeettcchhGGeettFFTTPP(), and ffeettcchhPPuuttFFTTPP() implement the FTP proto-
col as described in RFC 959.
By default lliibbffeettcchh will attempt to use passive mode first and only fall-
back to active mode if the server reports a syntax error. If the `a'
(active) flag is specified, a passive connection is not tried and active
mode is used directly.
If the `l' (low) flag is specified, data sockets will be allocated in the
low (or default) port range instead of the high port range (see ip(4)).
If the `d' (direct) flag is specified, ffeettcchhXXGGeettFFTTPP(), ffeettcchhGGeettFFTTPP(), and
ffeettcchhPPuuttFFTTPP() will use a direct connection even if a proxy server is
defined.
If no user name or password is given, the ffeettcchh library will attempt an
anonymous login, with user name "anonymous" and password "anony-
mous@<hostname>".
HHTTTTPP SSCCHHEEMMEE
The ffeettcchhXXGGeettHHTTTTPP(), ffeettcchhGGeettHHTTTTPP(), and ffeettcchhPPuuttHHTTTTPP() functions imple-
ment the HTTP/1.1 protocol. With a little luck, there is even a chance
that they comply with RFC 2616 and RFC 2617.
If the `d' (direct) flag is specified, ffeettcchhXXGGeettHHTTTTPP(), ffeettcchhGGeettHHTTTTPP(),
and ffeettcchhPPuuttHHTTTTPP() will use a direct connection even if a proxy server is
defined.
Since there seems to be no good way of implementing the HTTP PUT method
in a manner consistent with the rest of the ffeettcchh library, ffeettcchhPPuuttHHTTTTPP()
is currently unimplemented.
AAUUTTHHEENNTTIICCAATTIIOONN
Apart from setting the appropriate environment variables and specifying
the user name and password in the URL or the _s_t_r_u_c_t _u_r_l, the calling pro-
gram has the option of defining an authentication function with the fol-
lowing prototype:
_i_n_t mmyyAAuutthhMMeetthhoodd(_s_t_r_u_c_t _u_r_l _*_u)
The callback function should fill in the _u_s_e_r and _p_w_d fields in the pro-
vided _s_t_r_u_c_t _u_r_l and return 0 on success, or any other value to indicate
failure.
To register the authentication callback, simply set _f_e_t_c_h_A_u_t_h_M_e_t_h_o_d to
point at it. The callback will be used whenever a site requires authen-
tication and the appropriate environment variables are not set.
This interface is experimental and may be subject to change.
RREETTUURRNN VVAALLUUEESS
ffeettcchhPPaarrsseeUURRLL() returns a pointer to a _s_t_r_u_c_t _u_r_l containing the individ-
ual components of the URL. If it is unable to allocate memory, or the
URL is syntactically incorrect, ffeettcchhPPaarrsseeUURRLL() returns a NULL pointer.
The ffeettcchhSSttaatt() functions return 0 on success and -1 on failure.
All other functions return a stream pointer which may be used to access
the requested document, or NULL if an error occurred.
The following error codes are defined in <_f_e_t_c_h_._h>:
[FETCH_ABORT] Operation aborted
[FETCH_AUTH] Authentication failed
[FETCH_DOWN] Service unavailable
[FETCH_EXISTS] File exists
[FETCH_FULL] File system full
[FETCH_INFO] Informational response
[FETCH_MEMORY] Insufficient memory
[FETCH_MOVED] File has moved
[FETCH_NETWORK] Network error
[FETCH_OK] No error
[FETCH_PROTO] Protocol error
[FETCH_RESOLV] Resolver error
[FETCH_SERVER] Server error
[FETCH_TEMP] Temporary error
[FETCH_TIMEOUT] Operation timed out
[FETCH_UNAVAIL] File is not available
[FETCH_UNKNOWN] Unknown error
[FETCH_URL] Invalid URL
The accompanying error message includes a protocol-specific error code
and message, e.g. "File is not available (404 Not Found)"
EENNVVIIRROONNMMEENNTT
FETCH_BIND_ADDRESS Specifies a host name or IP address to which sockets
used for outgoing connections will be bound.
FTP_LOGIN Default FTP login if none was provided in the URL.
FTP_PASSIVE_MODE If set to anything but `no', forces the FTP code to
use passive mode.
FTP_PASSWORD Default FTP password if the remote server requests
one and none was provided in the URL.
FTP_PROXY URL of the proxy to use for FTP requests. The docu-
ment part is ignored. FTP and HTTP proxies are sup-
ported; if no scheme is specified, FTP is assumed.
If the proxy is an FTP proxy, lliibbffeettcchh will send
`user@host' as user name to the proxy, where `user'
is the real user name, and `host' is the name of the
FTP server.
If this variable is set to an empty string, no proxy
will be used for FTP requests, even if the HTTP_PROXY
variable is set.
ftp_proxy Same as FTP_PROXY, for compatibility.
HTTP_AUTH Specifies HTTP authorization parameters as a colon-
separated list of items. The first and second item
are the authorization scheme and realm respectively;
further items are scheme-dependent. Currently, only
basic authorization is supported.
Basic authorization requires two parameters: the user
name and password, in that order.
This variable is only used if the server requires
authorization and no user name or password was speci-
fied in the URL.
HTTP_PROXY URL of the proxy to use for HTTP requests. The docu-
ment part is ignored. Only HTTP proxies are sup-
ported for HTTP requests. If no port number is spec-
ified, the default is 3128.
Note that this proxy will also be used for FTP docu-
ments, unless the FTP_PROXY variable is set.
http_proxy Same as HTTP_PROXY, for compatibility.
HTTP_PROXY_AUTH Specifies authorization parameters for the HTTP proxy
in the same format as the HTTP_AUTH variable.
This variable is used if and only if connected to an
HTTP proxy, and is ignored if a user and/or a pass-
word were specified in the proxy URL.
HTTP_REFERER Specifies the referrer URL to use for HTTP requests.
If set to ``auto'', the document URL will be used as
referrer URL.
HTTP_USER_AGENT Specifies the User-Agent string to use for HTTP
requests. This can be useful when working with HTTP
origin or proxy servers that differentiate between
user agents.
NETRC Specifies a file to use instead of _~_/_._n_e_t_r_c to look
up login names and passwords for FTP sites. See
ftp(1) for a description of the file format. This
feature is experimental.
NO_PROXY Either a single asterisk, which disables the use of
proxies altogether, or a comma- or whitespace-sepa-
rated list of hosts for which proxies should not be
used.
no_proxy Same as NO_PROXY, for compatibility.
EEXXAAMMPPLLEESS
To access a proxy server on _p_r_o_x_y_._e_x_a_m_p_l_e_._c_o_m port 8080, set the
HTTP_PROXY environment variable in a manner similar to this:
HTTP_PROXY=http://proxy.example.com:8080
If the proxy server requires authentication, there are two options avail-
able for passing the authentication data. The first method is by using
the proxy URL:
HTTP_PROXY=http://<user>:<pwd>@proxy.example.com:8080
The second method is by using the HTTP_PROXY_AUTH environment variable:
HTTP_PROXY=http://proxy.example.com:8080
HTTP_PROXY_AUTH=basic:*:<user>:<pwd>
To disable the use of a proxy for an HTTP server running on the local
host, define NO_PROXY as follows:
NO_PROXY=localhost,127.0.0.1
SSEEEE AALLSSOO
ftp(1), ip(4)
J. Postel and J. K. Reynolds, _F_i_l_e _T_r_a_n_s_f_e_r _P_r_o_t_o_c_o_l, October 1985, RFC
959.
P. Deutsch, A. Emtage, and A. Marine, _H_o_w _t_o _U_s_e _A_n_o_n_y_m_o_u_s _F_T_P, May 1994,
RFC 1635.
T. Berners-Lee, L. Masinter, and M. McCahill, _U_n_i_f_o_r_m _R_e_s_o_u_r_c_e _L_o_c_a_t_o_r_s
_(_U_R_L_), December 1994, RFC 1738.
R. Fielding, J. Gettys, J. Mogul, H. Frystyk, L. Masinter, P. Leach, and
T. Berners-Lee, _H_y_p_e_r_t_e_x_t _T_r_a_n_s_f_e_r _P_r_o_t_o_c_o_l _-_- _H_T_T_P_/_1_._1, January 1999,
RFC 2616.
J. Franks, P. Hallam-Baker, J. Hostetler, S. Lawrence, P. Leach, A.
Luotonen, and L. Stewart, _H_T_T_P _A_u_t_h_e_n_t_i_c_a_t_i_o_n_: _B_a_s_i_c _a_n_d _D_i_g_e_s_t _A_c_c_e_s_s
_A_u_t_h_e_n_t_i_c_a_t_i_o_n, June 1999, RFC 2617.
HHIISSTTOORRYY
The ffeettcchh library first appeared in FreeBSD 3.0.
AAUUTTHHOORRSS
The ffeettcchh library was mostly written by Dag-Erling Smørgrav
<des@FreeBSD.org> with numerous suggestions from Jordan K. Hubbard
<jkh@FreeBSD.org>, Eugene Skepner <eu@qub.com> and other FreeBSD develop-
ers. It replaces the older ffttppiioo library written by Poul-Henning Kamp
<phk@FreeBSD.org> and Jordan K. Hubbard <jkh@FreeBSD.org>.
This manual page was written by Dag-Erling Smørgrav <des@FreeBSD.org>.
BBUUGGSS
Some parts of the library are not yet implemented. The most notable
examples of this are ffeettcchhPPuuttHHTTTTPP() and FTP proxy support.
There is no way to select a proxy at run-time other than setting the
HTTP_PROXY or FTP_PROXY environment variables as appropriate.
lliibbffeettcchh does not understand or obey 305 (Use Proxy) replies.
Error numbers are unique only within a certain context; the error codes
used for FTP and HTTP overlap, as do those used for resolver and system
errors. For instance, error code 202 means "Command not implemented,
superfluous at this site" in an FTP context and "Accepted" in an HTTP
context.
ffeettcchhSSttaattFFTTPP() does not check that the result of an MDTM command is a
valid date.
The man page is incomplete, poorly written and produces badly formatted
text.
The error reporting mechanism is unsatisfactory.
Some parts of the code are not fully reentrant.
NetBSD 5.0 January 22, 2010 NetBSD 5.0

187
libfetch/fetch.h Normal file
View File

@ -0,0 +1,187 @@
/* $NetBSD: fetch.h,v 1.16 2010/01/22 13:21:09 joerg Exp $ */
/*-
* Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD: fetch.h,v 1.26 2004/09/21 18:35:20 des Exp $
*/
#ifndef _FETCH_H_INCLUDED
#define _FETCH_H_INCLUDED
#include <sys/types.h>
#include <limits.h>
#include <stdio.h>
#define _LIBFETCH_VER "libfetch/2.0"
#define URL_HOSTLEN 255
#define URL_SCHEMELEN 16
#define URL_USERLEN 256
#define URL_PWDLEN 256
typedef struct fetchIO fetchIO;
struct url {
char scheme[URL_SCHEMELEN + 1];
char user[URL_USERLEN + 1];
char pwd[URL_PWDLEN + 1];
char host[URL_HOSTLEN + 1];
int port;
char *doc;
off_t offset;
size_t length;
time_t last_modified;
};
struct url_stat {
off_t size;
time_t atime;
time_t mtime;
};
struct url_list {
size_t length;
size_t alloc_size;
struct url *urls;
};
/* Recognized schemes */
#define SCHEME_FTP "ftp"
#define SCHEME_HTTP "http"
#define SCHEME_HTTPS "https"
#define SCHEME_FILE "file"
/* Error codes */
#define FETCH_ABORT 1
#define FETCH_AUTH 2
#define FETCH_DOWN 3
#define FETCH_EXISTS 4
#define FETCH_FULL 5
#define FETCH_INFO 6
#define FETCH_MEMORY 7
#define FETCH_MOVED 8
#define FETCH_NETWORK 9
#define FETCH_OK 10
#define FETCH_PROTO 11
#define FETCH_RESOLV 12
#define FETCH_SERVER 13
#define FETCH_TEMP 14
#define FETCH_TIMEOUT 15
#define FETCH_UNAVAIL 16
#define FETCH_UNKNOWN 17
#define FETCH_URL 18
#define FETCH_VERBOSE 19
#define FETCH_UNCHANGED 20
#if defined(__cplusplus)
extern "C" {
#endif
void fetchIO_close(fetchIO *);
ssize_t fetchIO_read(fetchIO *, void *, size_t);
ssize_t fetchIO_write(fetchIO *, const void *, size_t);
/* fetchIO-specific functions */
fetchIO *fetchXGetFile(struct url *, struct url_stat *, const char *);
fetchIO *fetchGetFile(struct url *, const char *);
fetchIO *fetchPutFile(struct url *, const char *);
int fetchStatFile(struct url *, struct url_stat *, const char *);
int fetchListFile(struct url_list *, struct url *, const char *,
const char *);
/* HTTP-specific functions */
fetchIO *fetchXGetHTTP(struct url *, struct url_stat *, const char *);
fetchIO *fetchGetHTTP(struct url *, const char *);
fetchIO *fetchPutHTTP(struct url *, const char *);
int fetchStatHTTP(struct url *, struct url_stat *, const char *);
int fetchListHTTP(struct url_list *, struct url *, const char *,
const char *);
/* FTP-specific functions */
fetchIO *fetchXGetFTP(struct url *, struct url_stat *, const char *);
fetchIO *fetchGetFTP(struct url *, const char *);
fetchIO *fetchPutFTP(struct url *, const char *);
int fetchStatFTP(struct url *, struct url_stat *, const char *);
int fetchListFTP(struct url_list *, struct url *, const char *,
const char *);
/* Generic functions */
fetchIO *fetchXGetURL(const char *, struct url_stat *, const char *);
fetchIO *fetchGetURL(const char *, const char *);
fetchIO *fetchPutURL(const char *, const char *);
int fetchStatURL(const char *, struct url_stat *, const char *);
int fetchListURL(struct url_list *, const char *, const char *,
const char *);
fetchIO *fetchXGet(struct url *, struct url_stat *, const char *);
fetchIO *fetchGet(struct url *, const char *);
fetchIO *fetchPut(struct url *, const char *);
int fetchStat(struct url *, struct url_stat *, const char *);
int fetchList(struct url_list *, struct url *, const char *,
const char *);
/* URL parsing */
struct url *fetchMakeURL(const char *, const char *, int,
const char *, const char *, const char *);
struct url *fetchParseURL(const char *);
struct url *fetchCopyURL(const struct url *);
char *fetchStringifyURL(const struct url *);
void fetchFreeURL(struct url *);
/* URL listening */
void fetchInitURLList(struct url_list *);
int fetchAppendURLList(struct url_list *, const struct url_list *);
void fetchFreeURLList(struct url_list *);
char *fetchUnquotePath(struct url *);
char *fetchUnquoteFilename(struct url *);
/* Connection caching */
void fetchConnectionCacheInit(int, int);
void fetchConnectionCacheClose(void);
/* Authentication */
typedef int (*auth_t)(struct url *);
extern auth_t fetchAuthMethod;
/* Last error code */
extern int fetchLastErrCode;
#define MAXERRSTRING 256
extern char fetchLastErrString[MAXERRSTRING];
/* I/O timeout */
extern int fetchTimeout;
/* Restart interrupted syscalls */
extern volatile int fetchRestartCalls;
/* Extra verbosity */
extern int fetchDebug;
#if defined(__cplusplus)
}
#endif
#endif

265
libfetch/file.c Normal file
View File

@ -0,0 +1,265 @@
/* $NetBSD: file.c,v 1.15 2009/10/15 12:36:57 joerg Exp $ */
/*-
* Copyright (c) 1998-2004 Dag-Erling Coïdan Smørgrav
* Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD: file.c,v 1.18 2007/12/14 10:26:58 des Exp $
*/
#if HAVE_CONFIG_H
#include "config.h"
#endif
#ifndef NETBSD
#include <nbcompat.h>
#endif
#include <sys/stat.h>
#include <dirent.h>
#include <fcntl.h>
#include <fnmatch.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "fetch.h"
#include "common.h"
static int fetch_stat_file(int, struct url_stat *);
static ssize_t
fetchFile_read(void *cookie, void *buf, size_t len)
{
return read(*(int *)cookie, buf, len);
}
static ssize_t
fetchFile_write(void *cookie, const void *buf, size_t len)
{
return write(*(int *)cookie, buf, len);
}
static void
fetchFile_close(void *cookie)
{
int fd = *(int *)cookie;
free(cookie);
close(fd);
}
fetchIO *
fetchXGetFile(struct url *u, struct url_stat *us, const char *flags)
{
char *path;
fetchIO *f;
struct url_stat local_us;
int if_modified_since, fd, *cookie;
if_modified_since = CHECK_FLAG('i');
if (if_modified_since && us == NULL)
us = &local_us;
if ((path = fetchUnquotePath(u)) == NULL) {
fetch_syserr();
return NULL;
}
fd = open(path, O_RDONLY);
free(path);
if (fd == -1) {
fetch_syserr();
return NULL;
}
if (us && fetch_stat_file(fd, us) == -1) {
close(fd);
fetch_syserr();
return NULL;
}
if (if_modified_since && u->last_modified > 0 &&
u->last_modified >= us->mtime) {
close(fd);
fetchLastErrCode = FETCH_UNCHANGED;
snprintf(fetchLastErrString, MAXERRSTRING, "Unchanged");
return NULL;
}
if (u->offset && lseek(fd, u->offset, SEEK_SET) == -1) {
close(fd);
fetch_syserr();
return NULL;
}
cookie = malloc(sizeof(int));
if (cookie == NULL) {
close(fd);
fetch_syserr();
return NULL;
}
*cookie = fd;
f = fetchIO_unopen(cookie, fetchFile_read, fetchFile_write, fetchFile_close);
if (f == NULL) {
close(fd);
free(cookie);
}
return f;
}
fetchIO *
fetchGetFile(struct url *u, const char *flags)
{
return (fetchXGetFile(u, NULL, flags));
}
fetchIO *
fetchPutFile(struct url *u, const char *flags)
{
char *path;
fetchIO *f;
int fd, *cookie;
if ((path = fetchUnquotePath(u)) == NULL) {
fetch_syserr();
return NULL;
}
if (CHECK_FLAG('a'))
fd = open(path, O_WRONLY | O_APPEND);
else
fd = open(path, O_WRONLY);
free(path);
if (fd == -1) {
fetch_syserr();
return NULL;
}
if (u->offset && lseek(fd, u->offset, SEEK_SET) == -1) {
close(fd);
fetch_syserr();
return NULL;
}
cookie = malloc(sizeof(int));
if (cookie == NULL) {
close(fd);
fetch_syserr();
return NULL;
}
*cookie = fd;
f = fetchIO_unopen(cookie, fetchFile_read, fetchFile_write, fetchFile_close);
if (f == NULL) {
close(fd);
free(cookie);
}
return f;
}
static int
fetch_stat_file(int fd, struct url_stat *us)
{
struct stat sb;
us->size = -1;
us->atime = us->mtime = 0;
if (fstat(fd, &sb) == -1) {
fetch_syserr();
return (-1);
}
us->size = sb.st_size;
us->atime = sb.st_atime;
us->mtime = sb.st_mtime;
return (0);
}
int
fetchStatFile(struct url *u, struct url_stat *us, const char *flags)
{
char *path;
int fd, rv;
if ((path = fetchUnquotePath(u)) == NULL) {
fetch_syserr();
return -1;
}
fd = open(path, O_RDONLY);
free(path);
if (fd == -1) {
fetch_syserr();
return -1;
}
rv = fetch_stat_file(fd, us);
close(fd);
return rv;
}
int
fetchListFile(struct url_list *ue, struct url *u, const char *pattern, const char *flags)
{
char *path;
struct dirent *de;
DIR *dir;
int ret;
if ((path = fetchUnquotePath(u)) == NULL) {
fetch_syserr();
return -1;
}
dir = opendir(path);
free(path);
if (dir == NULL) {
fetch_syserr();
return -1;
}
ret = 0;
while ((de = readdir(dir)) != NULL) {
if (pattern && fnmatch(pattern, de->d_name, 0) != 0)
continue;
ret = fetch_add_entry(ue, u, de->d_name, 0);
if (ret)
break;
}
closedir(dir);
return ret;
}

1310
libfetch/ftp.c Normal file

File diff suppressed because it is too large Load Diff

48
libfetch/ftp.errors Normal file
View File

@ -0,0 +1,48 @@
# $NetBSD: ftp.errors,v 1.2 2008/10/06 12:58:29 joerg Exp $
# $FreeBSD: ftp.errors,v 1.6 2002/10/30 06:06:16 des Exp $
#
# This list is taken from RFC 959.
# It probably needs a going over.
#
110 OK Restart marker reply
120 TEMP Service ready in a few minutes
125 OK Data connection already open; transfer starting
150 OK File status okay; about to open data connection
200 OK Command okay
202 PROTO Command not implemented, superfluous at this site
211 INFO System status, or system help reply
212 INFO Directory status
213 INFO File status
214 INFO Help message
215 INFO Set system type
220 OK Service ready for new user
221 OK Service closing control connection
225 OK Data connection open; no transfer in progress
226 OK Requested file action successful
227 OK Entering Passive Mode
229 OK Entering Extended Passive Mode
230 OK User logged in, proceed
250 OK Requested file action okay, completed
257 OK File/directory created
331 AUTH User name okay, need password
332 AUTH Need account for login
350 OK Requested file action pending further information
421 DOWN Service not available, closing control connection
425 NETWORK Can't open data connection
426 ABORT Connection closed; transfer aborted
450 UNAVAIL File unavailable (e.g., file busy)
451 SERVER Requested action aborted: local error in processing
452 FULL Insufficient storage space in system
500 PROTO Syntax error, command unrecognized
501 PROTO Syntax error in parameters or arguments
502 PROTO Command not implemented
503 PROTO Bad sequence of commands
504 PROTO Command not implemented for that parameter
530 AUTH Not logged in
532 AUTH Need account for storing files
535 PROTO Bug in MediaHawk Video Kernel FTP server
550 UNAVAIL File unavailable (e.g., file not found, no access)
551 PROTO Requested action aborted. Page type unknown
552 FULL Exceeded storage allocation
553 EXISTS File name not allowed
999 PROTO Protocol error

1552
libfetch/http.c Normal file

File diff suppressed because it is too large Load Diff

46
libfetch/http.errors Normal file
View File

@ -0,0 +1,46 @@
# $FreeBSD: http.errors,v 1.5 2001/05/23 18:52:02 des Exp $
# $NetBSD: http.errors,v 1.3 2009/02/05 16:59:45 joerg Exp $
#
# This list is taken from RFC 2068.
#
100 OK Continue
101 OK Switching Protocols
200 OK OK
201 OK Created
202 OK Accepted
203 INFO Non-Authoritative Information
204 OK No Content
205 OK Reset Content
206 OK Partial Content
300 MOVED Multiple Choices
301 MOVED Moved Permanently
302 MOVED Moved Temporarily
303 MOVED See Other
304 UNCHANGED Not Modified
305 INFO Use Proxy
307 MOVED Temporary Redirect
400 PROTO Bad Request
401 AUTH Unauthorized
402 AUTH Payment Required
403 AUTH Forbidden
404 UNAVAIL Not Found
405 PROTO Method Not Allowed
406 PROTO Not Acceptable
407 AUTH Proxy Authentication Required
408 TIMEOUT Request Time-out
409 EXISTS Conflict
410 UNAVAIL Gone
411 PROTO Length Required
412 SERVER Precondition Failed
413 PROTO Request Entity Too Large
414 PROTO Request-URI Too Large
415 PROTO Unsupported Media Type
416 UNAVAIL Requested Range Not Satisfiable
417 SERVER Expectation Failed
500 SERVER Internal Server Error
501 PROTO Not Implemented
502 SERVER Bad Gateway
503 TEMP Service Unavailable
504 TIMEOUT Gateway Time-out
505 PROTO HTTP Version not supported
999 PROTO Protocol error