add cat blog
parent
a32c5c9243
commit
e4995ef758
|
@ -0,0 +1,336 @@
|
|||
---
|
||||
title: 'How efficient can cat(1) be?'
|
||||
date: '2022-07-17'
|
||||
---
|
||||
|
||||
There have been a few initiatives in recent years to implement
|
||||
a new userspace base system for Linux distributions as an
|
||||
alternative to the GNU coreutils and BusyBox. Recently, one of
|
||||
the authors of one of these proposed implementations made the
|
||||
pitch in a few IRC channels [that her cat implementation][lc],
|
||||
which was derived from OpenBSD’s implementation, was the most
|
||||
efficient. But is it actually?
|
||||
|
||||
[lc]: https://vimuser.org/cat.c.txt
|
||||
|
||||
## Understanding what `cat` actually does
|
||||
|
||||
At the most basic level, `cat` takes one or more files and
|
||||
dumps them to `stdout`. But do we need to actually use `stdio`
|
||||
for this? Actually, we don’t, and most competent `cat`
|
||||
implementations at least use `read(2)` and `write(2)` if not
|
||||
more advanced approaches.
|
||||
|
||||
If we consider `cat` as a form of buffer copy between an
|
||||
arbitrary file descriptor and `STDOUT_FILENO`, we can understand
|
||||
what the most efficient strategy to use for `cat` would be: splicing.
|
||||
Anything which isn’t doing splicing, after all, involves unnecessary
|
||||
buffer copies, and thus cannot be the most efficient.
|
||||
|
||||
To get the best performance out of spliced I/O, we have to have
|
||||
some prerequisites:
|
||||
|
||||
* The source and destination file descriptors should be unbuffered.
|
||||
|
||||
* Any intermediate buffer should be a multiple of the filesystem
|
||||
block size. In general, to avoid doing a `stat` syscall, we can
|
||||
assume that a multiple of `PAGE_SIZE` is likely acceptable.
|
||||
|
||||
## A simple `cat` implementation
|
||||
|
||||
The simplest way to implement `cat` is the way that it is done in
|
||||
BSD: using `read` and `write` on an intermediate buffer. This
|
||||
results in two buffer copies, but has the best portability. Most
|
||||
implementations of `cat` work this way, as it generally offers
|
||||
good enough performance.
|
||||
|
||||
```c
|
||||
/* This program is released into the public domain. */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
void dumpfile(const char *path)
|
||||
{
|
||||
int srcfd = STDIN_FILENO;
|
||||
char buf[PAGE_SIZE * 16];
|
||||
ssize_t nread, nwritten;
|
||||
size_t offset;
|
||||
|
||||
/* POSIX allows - to represent stdin. */
|
||||
if (*path != '-')
|
||||
{
|
||||
srcfd = open(path, O_RDONLY);
|
||||
if (srcfd < 0)
|
||||
err(EXIT_FAILURE, "open %s", path);
|
||||
}
|
||||
|
||||
while ((nread = read(srcfd, buf, sizeof buf)) >= 1)
|
||||
{
|
||||
for (offset = 0; nread > 0; nread -= nwritten, offset += nwritten)
|
||||
{
|
||||
if ((nwritten = write(STDOUT_FILENO, buf + offset, nread)) <= 0)
|
||||
err(EXIT_FAILURE, "write stdout");
|
||||
}
|
||||
}
|
||||
|
||||
if (srcfd != STDIN_FILENO)
|
||||
(void) close(srcfd);
|
||||
}
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 1; i < argc; i++)
|
||||
dumpfile(argv[i]);
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
```
|
||||
|
||||
## Implementing spliced I/O
|
||||
|
||||
Linux has no shortage of ways to perform spliced I/O. For our `cat`
|
||||
implementation, we have two possible ways to do it.
|
||||
|
||||
The first possible option is the venerable `sendfile` syscall, which
|
||||
was [originally added to improve the file serving performance of web
|
||||
servers][sf-origin]. Originally, `sendfile` required the destination
|
||||
file descriptor to be a socket, but this restriction was removed in
|
||||
Linux 2.6.33. Unfortunately, sendfile is not perfect: because it
|
||||
only supports file descriptors which can be memory mapped, we
|
||||
must use a different strategy when using copying from `stdin`.
|
||||
|
||||
```c
|
||||
/* This program is released into the public domain. */
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/sendfile.h>
|
||||
|
||||
bool spliced_copy(int srcfd)
|
||||
{
|
||||
ssize_t nwritten;
|
||||
off_t offset = 0;
|
||||
|
||||
do
|
||||
{
|
||||
nwritten = sendfile(STDOUT_FILENO, srcfd, &offset,
|
||||
PAGE_SIZE * 16);
|
||||
if (nwritten < 0)
|
||||
return false;
|
||||
} while (nwritten > 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void copy(int srcfd)
|
||||
{
|
||||
char buf[PAGE_SIZE * 16];
|
||||
size_t nread, nwritten, offset;
|
||||
|
||||
while ((nread = read(srcfd, buf, sizeof buf)) >= 1)
|
||||
{
|
||||
for (offset = 0; nread > 0;
|
||||
nread -= nwritten, offset += nwritten)
|
||||
{
|
||||
if ((nwritten = write(STDOUT_FILENO,
|
||||
buf + offset, nread)) <= 0)
|
||||
err(EXIT_FAILURE, "write stdout");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dumpfile(const char *path)
|
||||
{
|
||||
int srcfd = STDIN_FILENO;
|
||||
char buf[PAGE_SIZE * 16];
|
||||
|
||||
/* POSIX allows - to represent stdin. */
|
||||
if (*path != '-')
|
||||
{
|
||||
srcfd = open(path, O_RDONLY);
|
||||
if (srcfd < 0)
|
||||
err(EXIT_FAILURE, "open %s", path);
|
||||
}
|
||||
|
||||
/* Fall back to traditional copy if the spliced version fails. */
|
||||
if (!spliced_copy(srcfd))
|
||||
copy(srcfd);
|
||||
|
||||
if (srcfd != STDIN_FILENO)
|
||||
(void) close(srcfd);
|
||||
}
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
int i;
|
||||
int stdout_flags;
|
||||
|
||||
stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
|
||||
if (stdout_flags < 0)
|
||||
err(EXIT_FAILURE, "fcntl(STDOUT_FILENO, F_GETFL)");
|
||||
stdout_flags &= ~O_APPEND;
|
||||
if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) < 0)
|
||||
err(EXIT_FAILURE, "fcntl(STDOUT_FILENO, F_SETFL)");
|
||||
|
||||
for (i = 1; i < argc; i++)
|
||||
dumpfile(argv[i]);
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
```
|
||||
|
||||
Another approach is to use `splice` and a pipe. This allows for true
|
||||
zero-copy I/O in userspace, as a pipe is simply implemented as a 64KB
|
||||
ring buffer in the kernel. In this case, we just use two splice
|
||||
operations per block of data we want to copy: one to move the data to
|
||||
the pipe and another to move the data from the pipe to the output file.
|
||||
|
||||
```c
|
||||
/* This program is released into the public domain. */
|
||||
#define _GNU_SOURCE
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <limits.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/sendfile.h>
|
||||
|
||||
#define BLOCK_SIZE ((PAGE_SIZE * 16) - 1)
|
||||
|
||||
bool spliced_copy(int srcfd)
|
||||
{
|
||||
int pipefd[2];
|
||||
ssize_t nread, nwritten;
|
||||
off_t in_offset = 0;
|
||||
bool ret = true;
|
||||
|
||||
if (pipe(pipefd) < 0)
|
||||
err(EXIT_FAILURE, "pipe");
|
||||
|
||||
do
|
||||
{
|
||||
nread = splice(srcfd, &in_offset, pipefd[1], NULL,
|
||||
BLOCK_SIZE, SPLICE_F_MOVE | SPLICE_F_MORE);
|
||||
if (nread <= 0)
|
||||
{
|
||||
ret = nread < 0 ? false : true;
|
||||
goto out;
|
||||
}
|
||||
|
||||
nwritten = splice(pipefd[0], NULL, STDOUT_FILENO, NULL,
|
||||
BLOCK_SIZE, SPLICE_F_MOVE | SPLICE_F_MORE);
|
||||
if (nwritten < 0)
|
||||
{
|
||||
ret = false;
|
||||
goto out;
|
||||
}
|
||||
} while (nwritten > 0);
|
||||
|
||||
out:
|
||||
close(pipefd[0]);
|
||||
close(pipefd[1]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void copy(int srcfd)
|
||||
{
|
||||
char buf[PAGE_SIZE * 16];
|
||||
size_t nread, nwritten, offset;
|
||||
|
||||
while ((nread = read(srcfd, buf, sizeof buf)) >= 1)
|
||||
{
|
||||
for (offset = 0; nread > 0;
|
||||
nread -= nwritten, offset += nwritten)
|
||||
{
|
||||
if ((nwritten = write(STDOUT_FILENO,
|
||||
buf + offset, nread)) <= 0)
|
||||
err(EXIT_FAILURE, "write stdout");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void dumpfile(const char *path)
|
||||
{
|
||||
int srcfd = STDIN_FILENO;
|
||||
char buf[PAGE_SIZE * 16];
|
||||
|
||||
/* POSIX allows - to represent stdin. */
|
||||
if (*path != '-')
|
||||
{
|
||||
srcfd = open(path, O_RDONLY);
|
||||
if (srcfd < 0)
|
||||
err(EXIT_FAILURE, "open %s", path);
|
||||
|
||||
(void) posix_fadvise(srcfd, 0, 0, POSIX_FADV_SEQUENTIAL);
|
||||
}
|
||||
|
||||
/* Fall back to traditional copy if the spliced version fails. */
|
||||
if (!spliced_copy(srcfd))
|
||||
copy(srcfd);
|
||||
|
||||
if (srcfd != STDIN_FILENO)
|
||||
(void) close(srcfd);
|
||||
}
|
||||
|
||||
int main(int argc, const char *argv[])
|
||||
{
|
||||
int i;
|
||||
int stdout_flags;
|
||||
|
||||
stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
|
||||
if (stdout_flags < 0)
|
||||
err(EXIT_FAILURE, "fcntl(STDOUT_FILENO, F_GETFL)");
|
||||
stdout_flags &= ~O_APPEND;
|
||||
if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) < 0)
|
||||
err(EXIT_FAILURE, "fcntl(STDOUT_FILENO, F_SETFL)");
|
||||
|
||||
for (i = 1; i < argc; i++)
|
||||
dumpfile(argv[i]);
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
```
|
||||
|
||||
## Honorable mention: `copy_file_range`
|
||||
|
||||
While `copy_file_range` is not really that relevant to a `cat`
|
||||
implementation, if both the source and output files are normal
|
||||
files, you can use it to get even faster performance than using
|
||||
splice, as the kernel handles all of the details on its own. An
|
||||
optimized `cat` might try this strategy and then downgrade to
|
||||
`splice`, `sendfile`, and the normal `read` and `write` loop.
|
||||
|
||||
## Performance comparison
|
||||
|
||||
To measure the performance of each strategy, we can simply use
|
||||
`dd` as a sink, running each cat program piped into
|
||||
`dd of=/dev/null bs=64K iflag=fullblock`.
|
||||
|
||||
The runs in the table below are averaged across 1000 runs on a
|
||||
8GB RAM Linode, using a 4GB file in tmpfs.
|
||||
|
||||
| Strategy | Throughput |
|
||||
|----------------------------------------|------------|
|
||||
| `cat-simple` (`read` and `write` loop) | 3.6 GB/s |
|
||||
| `cat-sendfile` | 6.4 GB/s |
|
||||
| `cat-splice` | 11.6 GB/s |
|
||||
|
||||
If you are interested in using these implementations in your own
|
||||
`cat` implementation, you may do so under any license terms you
|
||||
wish.
|
Loading…
Reference in New Issue