Since kernel 2.6.22, Linux provides an additional, nonstandard synchronization mechanism via the eventfd() system call.

  1. This system call creates an eventfd object that has an associated 8-byte unsigned integer maintained by the kernel.
  2. The system call returns a file descriptor that refers to the object.
  3. Writing an integer to this file descriptor adds that integer to the object’s value.
  4. A read() from the file descriptor blocks if the object’s value is 0. If the object has a nonzero value, a read() returns that value and resets it to 0.
  5. In addition, poll(), select(), or epoll can be used to test if the object has a nonzero value; if it does, the file descriptor indicates as being readable.
  6. An application that wishes to use an eventfd object for synchronization must first create the object using eventfd(), and then call fork() to create related processes that inherit file descriptors referring to the object.

可利用eventfd来提供代码的同步机制。eventfd() creates an “eventfd object” that can be used as an event wait/notify mechanism by user-space applications, and by the kernel to notify user-space applications of events. The object contains an unsigned 64-bit integer (uint64_t) counter that is maintained by the kernel. This counter is initialized with the value specified in the argument initval.


#include <sys/eventfd.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>             /* Definition of uint64_t */

#define handle_error(msg)                           \
  do { perror(msg); exit(EXIT_FAILURE); } while (0)

main(int argc, char *argv[])
  int efd, j;
  uint64_t u;
  ssize_t s;

  if (argc < 2) {
    fprintf(stderr, "Usage: %s <num>...\n", argv[0]);

  efd = eventfd(0, 0);
  if (efd == -1)

  switch (fork()) {
  case 0:
    for (j = 1; j < argc; j++) {
      printf("Child writing %s to efd\n", argv[j]);
      u = strtoull(argv[j], NULL, 0);
      /* strtoull() allows various bases */
      s = write(efd, &u, sizeof(uint64_t));
      if (s != sizeof(uint64_t))
    printf("Child completed write loop\n");


    //    sleep(2);

    printf("Parent about to read\n");
    s = read(efd, &u, sizeof(uint64_t));
    if (s != sizeof(uint64_t))
    printf("Parent read %llu (0x%llx) from efd\n",
           (unsigned long long) u, (unsigned long long) u);

  case -1:


 $ ./test 1 2 3 5                                                                                                            [0:08:52]
Child writing 1 to efd
Child writing 2 to efd
Child writing 3 to efd
Child writing 5 to efd
Child completed write loop
Parent about to read
Parent read 11 (0xb) from efd


struct semaphore_t {
  int fd;

semaphore_t *semaphore_new(unsigned int value) {
  semaphore_t *ret = malloc(sizeof(semaphore_t));
  ret->fd = eventfd(value, EFD_SEMAPHORE);
  if (ret->fd == INVALID_FD) {
    ret = NULL;
  return ret;

void semaphore_free(semaphore_t *semaphore) {
  if (!semaphore)

  if (semaphore->fd != INVALID_FD)

void semaphore_wait(semaphore_t *semaphore) {
  assert(semaphore != NULL);
  assert(semaphore->fd != INVALID_FD);

  eventfd_t value;
  if (eventfd_read(semaphore->fd, &value) == -1)
    printf("%s unable to wait on semaphore: %s", __func__, strerror(errno));

bool semaphore_try_wait(semaphore_t *semaphore) {
  assert(semaphore != NULL);
  assert(semaphore->fd != INVALID_FD);

  int flags = fcntl(semaphore->fd, F_GETFL);
  if (flags == -1) {
    printf("%s unable to get flags for semaphore fd: %s", __func__, strerror(errno));
    return false;
  if (fcntl(semaphore->fd, F_SETFL, flags | O_NONBLOCK) == -1) {
    printf("%s unable to set O_NONBLOCK for semaphore fd: %s", __func__, strerror(errno));
    return false;

  bool rc = true;
  eventfd_t value;
  if (eventfd_read(semaphore->fd, &value) == -1)
    rc = false;

  if (fcntl(semaphore->fd, F_SETFL, flags) == -1)
    printf("%s unable to restore flags for semaphore fd: %s", __func__, strerror(errno));
  return rc;

void semaphore_post(semaphore_t *semaphore) {
  assert(semaphore != NULL);
  assert(semaphore->fd != INVALID_FD);

  if (eventfd_write(semaphore->fd, 1ULL) == -1)
    printf("%s unable to post to semaphore: %s", __func__, strerror(errno));

int semaphore_get_fd(const semaphore_t *semaphore) {
  assert(semaphore != NULL);
  assert(semaphore->fd != INVALID_FD);
  return semaphore->fd;

当eventfd()第二个参数为 EFD_SEMAPHORE 时,这时eventfd对象就有了semaphore的语义, 当调用 read的时候,如果对象的值不为0,则 返回值为1, 与对象关联的整数值减1.


semaphore_t *semaphore = semaphore_new(0);
semaphore_try_wait(semaphore); //返回false
semaphore_try_wait(semaphore); //返回true.