fd.c 75.8 KB
Newer Older
1 2 3
/*
 * Server-side file descriptor management
 *
4
 * Copyright (C) 2000, 2003 Alexandre Julliard
5 6 7 8 9 10 11 12 13 14 15 16 17
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19 20 21 22
 */


#include "config.h"
23
#include "wine/port.h"
24 25

#include <assert.h>
26
#include <errno.h>
27
#include <fcntl.h>
28
#include <limits.h>
29
#include <signal.h>
30
#include <stdarg.h>
31 32 33
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
34 35 36
#ifdef HAVE_POLL_H
#include <poll.h>
#endif
37 38 39
#ifdef HAVE_SYS_POLL_H
#include <sys/poll.h>
#endif
40 41 42 43 44 45 46
#ifdef HAVE_LINUX_MAJOR_H
#include <linux/major.h>
#endif
#ifdef HAVE_SYS_STATVFS_H
#include <sys/statvfs.h>
#endif
#ifdef HAVE_SYS_VFS_H
47
/* Work around a conflict with Solaris' system list defined in sys/list.h. */
48 49 50 51 52 53 54
#define list SYSLIST
#define list_next SYSLIST_NEXT
#define list_prev SYSLIST_PREV
#define list_head SYSLIST_HEAD
#define list_tail SYSLIST_TAIL
#define list_move_tail SYSLIST_MOVE_TAIL
#define list_remove SYSLIST_REMOVE
55
#include <sys/vfs.h>
56 57 58 59 60 61 62
#undef list
#undef list_next
#undef list_prev
#undef list_head
#undef list_tail
#undef list_move_tail
#undef list_remove
63
#endif
64 65 66
#ifdef HAVE_SYS_PARAM_H
#include <sys/param.h>
#endif
67 68 69 70 71 72
#ifdef HAVE_SYS_MOUNT_H
#include <sys/mount.h>
#endif
#ifdef HAVE_SYS_STATFS_H
#include <sys/statfs.h>
#endif
73 74 75
#ifdef HAVE_SYS_SYSCTL_H
#include <sys/sysctl.h>
#endif
76 77 78 79 80
#ifdef HAVE_SYS_EVENT_H
#include <sys/event.h>
#undef LIST_INIT
#undef LIST_ENTRY
#endif
81 82 83
#ifdef HAVE_STDINT_H
#include <stdint.h>
#endif
84
#include <sys/stat.h>
85
#include <sys/time.h>
86 87 88 89 90
#ifdef MAJOR_IN_MKDEV
#include <sys/mkdev.h>
#elif defined(MAJOR_IN_SYSMACROS)
#include <sys/sysmacros.h>
#endif
91
#include <sys/types.h>
92
#include <unistd.h>
93 94 95
#ifdef HAVE_SYS_SYSCALL_H
#include <sys/syscall.h>
#endif
96

97 98
#include "ntstatus.h"
#define WIN32_NO_STATUS
99 100 101 102 103 104
#include "object.h"
#include "file.h"
#include "handle.h"
#include "process.h"
#include "request.h"

105
#include "winternl.h"
106
#include "winioctl.h"
107

108
#if defined(HAVE_SYS_EPOLL_H) && defined(HAVE_EPOLL_CREATE)
109
# include <sys/epoll.h>
110
# define USE_EPOLL
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
#elif defined(linux) && defined(__i386__) && defined(HAVE_STDINT_H)
# define USE_EPOLL
# define EPOLLIN POLLIN
# define EPOLLOUT POLLOUT
# define EPOLLERR POLLERR
# define EPOLLHUP POLLHUP
# define EPOLL_CTL_ADD 1
# define EPOLL_CTL_DEL 2
# define EPOLL_CTL_MOD 3

typedef union epoll_data
{
  void *ptr;
  int fd;
  uint32_t u32;
  uint64_t u64;
} epoll_data_t;

struct epoll_event
{
  uint32_t events;
  epoll_data_t data;
};

static inline int epoll_create( int size )
{
137
    return syscall( 254 /*NR_epoll_create*/, size );
138 139 140 141
}

static inline int epoll_ctl( int epfd, int op, int fd, const struct epoll_event *event )
{
142
    return syscall( 255 /*NR_epoll_ctl*/, epfd, op, fd, event );
143 144 145 146
}

static inline int epoll_wait( int epfd, struct epoll_event *events, int maxevents, int timeout )
{
147
    return syscall( 256 /*NR_epoll_wait*/, epfd, events, maxevents, timeout );
148 149 150 151
}

#endif /* linux && __i386__ && HAVE_STDINT_H */

152 153 154 155
#if defined(HAVE_PORT_H) && defined(HAVE_PORT_CREATE)
# include <port.h>
# define USE_EVENT_PORTS
#endif /* HAVE_PORT_H && HAVE_PORT_CREATE */
156

157 158 159 160 161
/* Because of the stupid Posix locking semantics, we need to keep
 * track of all file descriptors referencing a given file, and not
 * close a single one until all the locks are gone (sigh).
 */

162 163 164 165 166
/* file descriptor object */

/* closed_fd is used to keep track of the unix fd belonging to a closed fd object */
struct closed_fd
{
167
    struct list entry;       /* entry in inode closed list */
168
    int         unix_fd;     /* the unix file descriptor */
169 170
    int         unlink;      /* whether to unlink on close */
    char       *unix_name;   /* name to unlink on close, points to parent fd unix_name */
171 172
};

173 174
struct fd
{
175 176 177 178 179 180
    struct object        obj;         /* object header */
    const struct fd_ops *fd_ops;      /* file descriptor operations */
    struct inode        *inode;       /* inode that this fd belongs to */
    struct list          inode_entry; /* entry in inode fd list */
    struct closed_fd    *closed;      /* structure to store the unix fd at destroy time */
    struct object       *user;        /* object using this file descriptor */
181
    struct list          locks;       /* list of locks on this fd */
182
    unsigned int         access;      /* file access (FILE_READ_DATA etc.) */
183
    unsigned int         options;     /* file options (FILE_DELETE_ON_CLOSE, FILE_SYNCHRONOUS...) */
184
    unsigned int         sharing;     /* file sharing mode */
185
    char                *unix_name;   /* unix file name */
186
    int                  unix_fd;     /* unix file descriptor */
187
    unsigned int         no_fd_status;/* status to return when unix_fd is -1 */
188
    unsigned int         cacheable :1;/* can the fd be cached on the client side? */
189 190
    unsigned int         signaled :1; /* is the fd signaled? */
    unsigned int         fs_locks :1; /* can we use filesystem locks for this fd? */
191
    int                  poll_index;  /* index of fd in poll array */
192 193 194
    struct async_queue  *read_q;      /* async readers of this fd */
    struct async_queue  *write_q;     /* async writers of this fd */
    struct async_queue  *wait_q;      /* other async waiters of this fd */
195
    struct completion   *completion;  /* completion object attached to this fd */
196
    apc_param_t          comp_key;    /* completion key to set in completion events */
197 198 199 200 201 202 203 204 205
};

static void fd_dump( struct object *obj, int verbose );
static void fd_destroy( struct object *obj );

static const struct object_ops fd_ops =
{
    sizeof(struct fd),        /* size */
    fd_dump,                  /* dump */
206
    no_get_type,              /* get_type */
207 208 209 210
    no_add_queue,             /* add_queue */
    NULL,                     /* remove_queue */
    NULL,                     /* signaled */
    NULL,                     /* satisfied */
211
    no_signal,                /* signal */
212
    no_get_fd,                /* get_fd */
213
    no_map_access,            /* map_access */
214 215
    default_get_sd,           /* get_sd */
    default_set_sd,           /* set_sd */
216
    no_lookup_name,           /* lookup_name */
217 218
    no_link_name,             /* link_name */
    NULL,                     /* unlink_name */
219
    no_open_file,             /* open_file */
220
    no_close_handle,          /* close_handle */
221 222 223
    fd_destroy                /* destroy */
};

224 225 226 227 228 229 230 231 232 233
/* device object */

#define DEVICE_HASH_SIZE 7
#define INODE_HASH_SIZE 17

struct device
{
    struct object       obj;        /* object header */
    struct list         entry;      /* entry in device hash list */
    dev_t               dev;        /* device number */
234
    int                 removable;  /* removable device? (or -1 if unknown) */
235 236 237 238 239 240 241 242 243 244
    struct list         inode_hash[INODE_HASH_SIZE];  /* inodes hash table */
};

static void device_dump( struct object *obj, int verbose );
static void device_destroy( struct object *obj );

static const struct object_ops device_ops =
{
    sizeof(struct device),    /* size */
    device_dump,              /* dump */
245
    no_get_type,              /* get_type */
246 247 248 249 250 251
    no_add_queue,             /* add_queue */
    NULL,                     /* remove_queue */
    NULL,                     /* signaled */
    NULL,                     /* satisfied */
    no_signal,                /* signal */
    no_get_fd,                /* get_fd */
252
    no_map_access,            /* map_access */
253 254
    default_get_sd,           /* get_sd */
    default_set_sd,           /* set_sd */
255
    no_lookup_name,           /* lookup_name */
256 257
    no_link_name,             /* link_name */
    NULL,                     /* unlink_name */
258
    no_open_file,             /* open_file */
259 260 261 262
    no_close_handle,          /* close_handle */
    device_destroy            /* destroy */
};

263 264 265 266 267 268
/* inode object */

struct inode
{
    struct object       obj;        /* object header */
    struct list         entry;      /* inode hash list entry */
269
    struct device      *device;     /* device containing this inode */
270 271
    ino_t               ino;        /* inode number */
    struct list         open;       /* list of open file descriptors */
272
    struct list         locks;      /* list of file locks */
273
    struct list         closed;     /* list of file descriptors to close at destroy time */
274 275 276 277 278 279 280 281 282
};

static void inode_dump( struct object *obj, int verbose );
static void inode_destroy( struct object *obj );

static const struct object_ops inode_ops =
{
    sizeof(struct inode),     /* size */
    inode_dump,               /* dump */
283
    no_get_type,              /* get_type */
284 285 286 287
    no_add_queue,             /* add_queue */
    NULL,                     /* remove_queue */
    NULL,                     /* signaled */
    NULL,                     /* satisfied */
288
    no_signal,                /* signal */
289
    no_get_fd,                /* get_fd */
290
    no_map_access,            /* map_access */
291 292
    default_get_sd,           /* get_sd */
    default_set_sd,           /* set_sd */
293
    no_lookup_name,           /* lookup_name */
294 295
    no_link_name,             /* link_name */
    NULL,                     /* unlink_name */
296
    no_open_file,             /* open_file */
297
    no_close_handle,          /* close_handle */
298 299 300
    inode_destroy             /* destroy */
};

301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
/* file lock object */

struct file_lock
{
    struct object       obj;         /* object header */
    struct fd          *fd;          /* fd owning this lock */
    struct list         fd_entry;    /* entry in list of locks on a given fd */
    struct list         inode_entry; /* entry in inode list of locks */
    int                 shared;      /* shared lock? */
    file_pos_t          start;       /* locked region is interval [start;end) */
    file_pos_t          end;
    struct process     *process;     /* process owning this lock */
    struct list         proc_entry;  /* entry in list of locks owned by the process */
};

static void file_lock_dump( struct object *obj, int verbose );
317
static int file_lock_signaled( struct object *obj, struct wait_queue_entry *entry );
318 319 320 321 322

static const struct object_ops file_lock_ops =
{
    sizeof(struct file_lock),   /* size */
    file_lock_dump,             /* dump */
323
    no_get_type,                /* get_type */
324 325 326 327
    add_queue,                  /* add_queue */
    remove_queue,               /* remove_queue */
    file_lock_signaled,         /* signaled */
    no_satisfied,               /* satisfied */
328
    no_signal,                  /* signal */
329
    no_get_fd,                  /* get_fd */
330
    no_map_access,              /* map_access */
331 332
    default_get_sd,             /* get_sd */
    default_set_sd,             /* set_sd */
333
    no_lookup_name,             /* lookup_name */
334 335
    no_link_name,               /* link_name */
    NULL,                       /* unlink_name */
336
    no_open_file,               /* open_file */
337
    no_close_handle,            /* close_handle */
338 339 340 341 342 343 344 345 346
    no_destroy                  /* destroy */
};


#define OFF_T_MAX       (~((file_pos_t)1 << (8*sizeof(off_t)-1)))
#define FILE_POS_T_MAX  (~(file_pos_t)0)

static file_pos_t max_unix_offset = OFF_T_MAX;

347 348
#define DUMP_LONG_LONG(val) do { \
    if (sizeof(val) > sizeof(unsigned long) && (val) > ~0UL) \
349
        fprintf( stderr, "%lx%08lx", (unsigned long)((unsigned long long)(val) >> 32), (unsigned long)(val) ); \
350 351 352 353 354
    else \
        fprintf( stderr, "%lx", (unsigned long)(val) ); \
  } while (0)


355

356 357 358 359 360
/****************************************************************/
/* timeouts support */

struct timeout_user
{
361
    struct list           entry;      /* entry in sorted timeout list */
362
    timeout_t             when;       /* timeout expiry (absolute time) */
363 364 365 366
    timeout_callback      callback;   /* callback function */
    void                 *private;    /* callback private data */
};

367
static struct list timeout_list = LIST_INIT(timeout_list);   /* sorted timeouts list */
368 369 370 371 372 373 374 375 376
timeout_t current_time;

static inline void set_current_time(void)
{
    static const timeout_t ticks_1601_to_1970 = (timeout_t)86400 * (369 * 365 + 89) * TICKS_PER_SEC;
    struct timeval now;
    gettimeofday( &now, NULL );
    current_time = (timeout_t)now.tv_sec * TICKS_PER_SEC + now.tv_usec * 10 + ticks_1601_to_1970;
}
377 378

/* add a timeout user */
379
struct timeout_user *add_timeout_user( timeout_t when, timeout_callback func, void *private )
380 381
{
    struct timeout_user *user;
382
    struct list *ptr;
383 384

    if (!(user = mem_alloc( sizeof(*user) ))) return NULL;
385
    user->when     = (when > 0) ? when : current_time - when;
386 387 388 389 390
    user->callback = func;
    user->private  = private;

    /* Now insert it in the linked list */

391
    LIST_FOR_EACH( ptr, &timeout_list )
392
    {
393
        struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
394
        if (timeout->when >= user->when) break;
395
    }
396
    list_add_before( ptr, &user->entry );
397 398 399 400 401 402
    return user;
}

/* remove a timeout user */
void remove_timeout_user( struct timeout_user *user )
{
403
    list_remove( &user->entry );
404 405 406
    free( user );
}

407 408
/* return a text description of a timeout for debugging purposes */
const char *get_timeout_str( timeout_t timeout )
409
{
410 411 412 413 414 415 416 417 418 419 420 421 422
    static char buffer[64];
    long secs, nsecs;

    if (!timeout) return "0";
    if (timeout == TIMEOUT_INFINITE) return "infinite";

    if (timeout < 0)  /* relative */
    {
        secs = -timeout / TICKS_PER_SEC;
        nsecs = -timeout % TICKS_PER_SEC;
        sprintf( buffer, "+%ld.%07ld", secs, nsecs );
    }
    else  /* absolute */
423
    {
424 425 426
        secs = (timeout - current_time) / TICKS_PER_SEC;
        nsecs = (timeout - current_time) % TICKS_PER_SEC;
        if (nsecs < 0)
427
        {
428 429
            nsecs += TICKS_PER_SEC;
            secs--;
430
        }
431 432 433 434 435 436 437
        if (secs >= 0)
            sprintf( buffer, "%x%08x (+%ld.%07ld)",
                     (unsigned int)(timeout >> 32), (unsigned int)timeout, secs, nsecs );
        else
            sprintf( buffer, "%x%08x (-%ld.%07ld)",
                     (unsigned int)(timeout >> 32), (unsigned int)timeout,
                     -(secs + 1), TICKS_PER_SEC - nsecs );
438
    }
439
    return buffer;
440 441 442 443 444 445 446 447 448 449 450 451 452
}


/****************************************************************/
/* poll support */

static struct fd **poll_users;              /* users array */
static struct pollfd *pollfd;               /* poll fd array */
static int nb_users;                        /* count of array entries actually in use */
static int active_users;                    /* current number of active users */
static int allocated_users;                 /* count of allocated entries in the array */
static struct fd **freelist;                /* list of free entries in the array */

453 454
static int get_next_timeout(void);

455 456 457 458 459
static inline void fd_poll_event( struct fd *fd, int event )
{
    fd->fd_ops->poll_event( fd, event );
}

460 461
#ifdef USE_EPOLL

462
static int epoll_fd = -1;
463 464 465 466 467

static inline void init_epoll(void)
{
    epoll_fd = epoll_create( 128 );
}
468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493

/* set the events that epoll waits for on this fd; helper for set_fd_events */
static inline void set_fd_epoll_events( struct fd *fd, int user, int events )
{
    struct epoll_event ev;
    int ctl;

    if (epoll_fd == -1) return;

    if (events == -1)  /* stop waiting on this fd completely */
    {
        if (pollfd[user].fd == -1) return;  /* already removed */
        ctl = EPOLL_CTL_DEL;
    }
    else if (pollfd[user].fd == -1)
    {
        if (pollfd[user].events) return;  /* stopped waiting on it, don't restart */
        ctl = EPOLL_CTL_ADD;
    }
    else
    {
        if (pollfd[user].events == events) return;  /* nothing to do */
        ctl = EPOLL_CTL_MOD;
    }

    ev.events = events;
494
    memset(&ev.data, 0, sizeof(ev.data));
495 496 497 498 499 500 501 502 503 504 505 506 507
    ev.data.u32 = user;

    if (epoll_ctl( epoll_fd, ctl, fd->unix_fd, &ev ) == -1)
    {
        if (errno == ENOMEM)  /* not enough memory, give up on epoll */
        {
            close( epoll_fd );
            epoll_fd = -1;
        }
        else perror( "epoll_ctl" );  /* should not happen */
    }
}

508 509 510
static inline void remove_epoll_user( struct fd *fd, int user )
{
    if (epoll_fd == -1) return;
511

512 513 514 515 516 517 518 519
    if (pollfd[user].fd != -1)
    {
        struct epoll_event dummy;
        epoll_ctl( epoll_fd, EPOLL_CTL_DEL, fd->unix_fd, &dummy );
    }
}

static inline void main_loop_epoll(void)
520
{
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
    int i, ret, timeout;
    struct epoll_event events[128];

    assert( POLLIN == EPOLLIN );
    assert( POLLOUT == EPOLLOUT );
    assert( POLLERR == EPOLLERR );
    assert( POLLHUP == EPOLLHUP );

    if (epoll_fd == -1) return;

    while (active_users)
    {
        timeout = get_next_timeout();

        if (!active_users) break;  /* last user removed by a timeout */
        if (epoll_fd == -1) break;  /* an error occurred with epoll */

        ret = epoll_wait( epoll_fd, events, sizeof(events)/sizeof(events[0]), timeout );
539
        set_current_time();
540 541 542 543 544 545 546 547 548 549 550 551 552 553 554

        /* put the events into the pollfd array first, like poll does */
        for (i = 0; i < ret; i++)
        {
            int user = events[i].data.u32;
            pollfd[user].revents = events[i].events;
        }

        /* read events from the pollfd array, as set_fd_events may modify them */
        for (i = 0; i < ret; i++)
        {
            int user = events[i].data.u32;
            if (pollfd[user].revents) fd_poll_event( poll_users[user], pollfd[user].revents );
        }
    }
555 556
}

557 558 559 560 561 562
#elif defined(HAVE_KQUEUE)

static int kqueue_fd = -1;

static inline void init_epoll(void)
{
563 564 565 566 567 568 569 570 571
#ifdef __APPLE__ /* kqueue support is broken in Mac OS < 10.5 */
    int mib[2];
    char release[32];
    size_t len = sizeof(release);

    mib[0] = CTL_KERN;
    mib[1] = KERN_OSRELEASE;
    if (sysctl( mib, 2, release, &len, NULL, 0 ) == -1) return;
    if (atoi(release) < 9) return;
572
#endif
573
    kqueue_fd = kqueue();
574 575 576 577 578 579 580 581
}

static inline void set_fd_epoll_events( struct fd *fd, int user, int events )
{
    struct kevent ev[2];

    if (kqueue_fd == -1) return;

582 583
    EV_SET( &ev[0], fd->unix_fd, EVFILT_READ, 0, NOTE_LOWAT, 1, (void *)(long)user );
    EV_SET( &ev[1], fd->unix_fd, EVFILT_WRITE, 0, NOTE_LOWAT, 1, (void *)(long)user );
584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652

    if (events == -1)  /* stop waiting on this fd completely */
    {
        if (pollfd[user].fd == -1) return;  /* already removed */
        ev[0].flags |= EV_DELETE;
        ev[1].flags |= EV_DELETE;
    }
    else if (pollfd[user].fd == -1)
    {
        if (pollfd[user].events) return;  /* stopped waiting on it, don't restart */
        ev[0].flags |= EV_ADD | ((events & POLLIN) ? EV_ENABLE : EV_DISABLE);
        ev[1].flags |= EV_ADD | ((events & POLLOUT) ? EV_ENABLE : EV_DISABLE);
    }
    else
    {
        if (pollfd[user].events == events) return;  /* nothing to do */
        ev[0].flags |= (events & POLLIN) ? EV_ENABLE : EV_DISABLE;
        ev[1].flags |= (events & POLLOUT) ? EV_ENABLE : EV_DISABLE;
    }

    if (kevent( kqueue_fd, ev, 2, NULL, 0, NULL ) == -1)
    {
        if (errno == ENOMEM)  /* not enough memory, give up on kqueue */
        {
            close( kqueue_fd );
            kqueue_fd = -1;
        }
        else perror( "kevent" );  /* should not happen */
    }
}

static inline void remove_epoll_user( struct fd *fd, int user )
{
    if (kqueue_fd == -1) return;

    if (pollfd[user].fd != -1)
    {
        struct kevent ev[2];

        EV_SET( &ev[0], fd->unix_fd, EVFILT_READ, EV_DELETE, 0, 0, 0 );
        EV_SET( &ev[1], fd->unix_fd, EVFILT_WRITE, EV_DELETE, 0, 0, 0 );
        kevent( kqueue_fd, ev, 2, NULL, 0, NULL );
    }
}

static inline void main_loop_epoll(void)
{
    int i, ret, timeout;
    struct kevent events[128];

    if (kqueue_fd == -1) return;

    while (active_users)
    {
        timeout = get_next_timeout();

        if (!active_users) break;  /* last user removed by a timeout */
        if (kqueue_fd == -1) break;  /* an error occurred with kqueue */

        if (timeout != -1)
        {
            struct timespec ts;

            ts.tv_sec = timeout / 1000;
            ts.tv_nsec = (timeout % 1000) * 1000000;
            ret = kevent( kqueue_fd, NULL, 0, events, sizeof(events)/sizeof(events[0]), &ts );
        }
        else ret = kevent( kqueue_fd, NULL, 0, events, sizeof(events)/sizeof(events[0]), NULL );

653
        set_current_time();
654

655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679
        /* put the events into the pollfd array first, like poll does */
        for (i = 0; i < ret; i++)
        {
            long user = (long)events[i].udata;
            pollfd[user].revents = 0;
        }
        for (i = 0; i < ret; i++)
        {
            long user = (long)events[i].udata;
            if (events[i].filter == EVFILT_READ) pollfd[user].revents |= POLLIN;
            else if (events[i].filter == EVFILT_WRITE) pollfd[user].revents |= POLLOUT;
            if (events[i].flags & EV_EOF) pollfd[user].revents |= POLLHUP;
            if (events[i].flags & EV_ERROR) pollfd[user].revents |= POLLERR;
        }

        /* read events from the pollfd array, as set_fd_events may modify them */
        for (i = 0; i < ret; i++)
        {
            long user = (long)events[i].udata;
            if (pollfd[user].revents) fd_poll_event( poll_users[user], pollfd[user].revents );
            pollfd[user].revents = 0;
        }
    }
}

680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780
#elif defined(USE_EVENT_PORTS)

static int port_fd = -1;

static inline void init_epoll(void)
{
    port_fd = port_create();
}

static inline void set_fd_epoll_events( struct fd *fd, int user, int events )
{
    int ret;

    if (port_fd == -1) return;

    if (events == -1)  /* stop waiting on this fd completely */
    {
        if (pollfd[user].fd == -1) return;  /* already removed */
        port_dissociate( port_fd, PORT_SOURCE_FD, fd->unix_fd );
    }
    else if (pollfd[user].fd == -1)
    {
        if (pollfd[user].events) return;  /* stopped waiting on it, don't restart */
        ret = port_associate( port_fd, PORT_SOURCE_FD, fd->unix_fd, events, (void *)user );
    }
    else
    {
        if (pollfd[user].events == events) return;  /* nothing to do */
        ret = port_associate( port_fd, PORT_SOURCE_FD, fd->unix_fd, events, (void *)user );
    }

    if (ret == -1)
    {
        if (errno == ENOMEM)  /* not enough memory, give up on port_associate */
        {
            close( port_fd );
            port_fd = -1;
        }
        else perror( "port_associate" );  /* should not happen */
    }
}

static inline void remove_epoll_user( struct fd *fd, int user )
{
    if (port_fd == -1) return;

    if (pollfd[user].fd != -1)
    {
        port_dissociate( port_fd, PORT_SOURCE_FD, fd->unix_fd );
    }
}

static inline void main_loop_epoll(void)
{
    int i, nget, ret, timeout;
    port_event_t events[128];

    if (port_fd == -1) return;

    while (active_users)
    {
        timeout = get_next_timeout();
        nget = 1;

        if (!active_users) break;  /* last user removed by a timeout */
        if (port_fd == -1) break;  /* an error occurred with event completion */

        if (timeout != -1)
        {
            struct timespec ts;

            ts.tv_sec = timeout / 1000;
            ts.tv_nsec = (timeout % 1000) * 1000000;
            ret = port_getn( port_fd, events, sizeof(events)/sizeof(events[0]), &nget, &ts );
        }
        else ret = port_getn( port_fd, events, sizeof(events)/sizeof(events[0]), &nget, NULL );

	if (ret == -1) break;  /* an error occurred with event completion */

        set_current_time();

        /* put the events into the pollfd array first, like poll does */
        for (i = 0; i < nget; i++)
        {
            long user = (long)events[i].portev_user;
            pollfd[user].revents = events[i].portev_events;
        }

        /* read events from the pollfd array, as set_fd_events may modify them */
        for (i = 0; i < nget; i++)
        {
            long user = (long)events[i].portev_user;
            if (pollfd[user].revents) fd_poll_event( poll_users[user], pollfd[user].revents );
            /* if we are still interested, reassociate the fd */
            if (pollfd[user].fd != -1) {
                port_associate( port_fd, PORT_SOURCE_FD, pollfd[user].fd, pollfd[user].events, (void *)user );
            }
        }
    }
}

781
#else /* HAVE_KQUEUE */
782 783 784 785 786 787

static inline void init_epoll(void) { }
static inline void set_fd_epoll_events( struct fd *fd, int user, int events ) { }
static inline void remove_epoll_user( struct fd *fd, int user ) { }
static inline void main_loop_epoll(void) { }

788 789 790
#endif /* USE_EPOLL */


791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817
/* add a user in the poll array and return its index, or -1 on failure */
static int add_poll_user( struct fd *fd )
{
    int ret;
    if (freelist)
    {
        ret = freelist - poll_users;
        freelist = (struct fd **)poll_users[ret];
    }
    else
    {
        if (nb_users == allocated_users)
        {
            struct fd **newusers;
            struct pollfd *newpoll;
            int new_count = allocated_users ? (allocated_users + allocated_users / 2) : 16;
            if (!(newusers = realloc( poll_users, new_count * sizeof(*poll_users) ))) return -1;
            if (!(newpoll = realloc( pollfd, new_count * sizeof(*pollfd) )))
            {
                if (allocated_users)
                    poll_users = newusers;
                else
                    free( newusers );
                return -1;
            }
            poll_users = newusers;
            pollfd = newpoll;
818
            if (!allocated_users) init_epoll();
819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
            allocated_users = new_count;
        }
        ret = nb_users++;
    }
    pollfd[ret].fd = -1;
    pollfd[ret].events = 0;
    pollfd[ret].revents = 0;
    poll_users[ret] = fd;
    active_users++;
    return ret;
}

/* remove a user from the poll list */
static void remove_poll_user( struct fd *fd, int user )
{
    assert( user >= 0 );
    assert( poll_users[user] == fd );
836

837
    remove_epoll_user( fd, user );
838 839 840 841 842 843 844 845
    pollfd[user].fd = -1;
    pollfd[user].events = 0;
    pollfd[user].revents = 0;
    poll_users[user] = (struct fd *)freelist;
    freelist = &poll_users[user];
    active_users--;
}

846 847
/* process pending timeouts and return the time until the next timeout, in milliseconds */
static int get_next_timeout(void)
848
{
849
    if (!list_empty( &timeout_list ))
850
    {
851
        struct list expired_list, *ptr;
852

853
        /* first remove all expired timers from the list */
854

855 856 857 858
        list_init( &expired_list );
        while ((ptr = list_head( &timeout_list )) != NULL)
        {
            struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
859

860
            if (timeout->when <= current_time)
861
            {
862
                list_remove( &timeout->entry );
863
                list_add_tail( &expired_list, &timeout->entry );
864
            }
865 866
            else break;
        }
867

868
        /* now call the callback for all the removed timers */
869

870 871 872 873 874 875 876 877 878 879 880
        while ((ptr = list_head( &expired_list )) != NULL)
        {
            struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
            list_remove( &timeout->entry );
            timeout->callback( timeout->private );
            free( timeout );
        }

        if ((ptr = list_head( &timeout_list )) != NULL)
        {
            struct timeout_user *timeout = LIST_ENTRY( ptr, struct timeout_user, entry );
881
            int diff = (timeout->when - current_time + 9999) / 10000;
882 883
            if (diff < 0) diff = 0;
            return diff;
884
        }
885 886 887 888 889 890 891 892 893
    }
    return -1;  /* no pending timeouts */
}

/* server main poll() loop */
void main_loop(void)
{
    int i, ret, timeout;

894 895
    set_current_time();
    server_start_time = current_time;
896

897
    main_loop_epoll();
898 899
    /* fall through to normal poll loop */

900 901 902 903 904
    while (active_users)
    {
        timeout = get_next_timeout();

        if (!active_users) break;  /* last user removed by a timeout */
905

906
        ret = poll( pollfd, nb_users, timeout );
907
        set_current_time();
908

909 910 911 912 913 914 915 916 917 918 919 920 921 922
        if (ret > 0)
        {
            for (i = 0; i < nb_users; i++)
            {
                if (pollfd[i].revents)
                {
                    fd_poll_event( poll_users[i], pollfd[i].revents );
                    if (!--ret) break;
                }
            }
        }
    }
}

923 924

/****************************************************************/
925 926 927 928
/* device functions */

static struct list device_hash[DEVICE_HASH_SIZE];

929 930 931 932 933 934 935 936 937 938 939 940
static int is_device_removable( dev_t dev, int unix_fd )
{
#if defined(linux) && defined(HAVE_FSTATFS)
    struct statfs stfs;

    /* check for floppy disk */
    if (major(dev) == FLOPPY_MAJOR) return 1;

    if (fstatfs( unix_fd, &stfs ) == -1) return 0;
    return (stfs.f_type == 0x9660 ||    /* iso9660 */
            stfs.f_type == 0x9fa1 ||    /* supermount */
            stfs.f_type == 0x15013346); /* udf */
941
#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__)
942 943 944
    struct statfs stfs;

    if (fstatfs( unix_fd, &stfs ) == -1) return 0;
945
    return (!strcmp("cd9660", stfs.f_fstypename) || !strcmp("udf", stfs.f_fstypename));
946 947 948 949
#elif defined(__NetBSD__)
    struct statvfs stfs;

    if (fstatvfs( unix_fd, &stfs ) == -1) return 0;
950
    return (!strcmp("cd9660", stfs.f_fstypename) || !strcmp("udf", stfs.f_fstypename));
951 952 953 954 955 956 957 958 959 960 961 962 963 964 965
#elif defined(sun)
# include <sys/dkio.h>
# include <sys/vtoc.h>
    struct dk_cinfo dkinf;
    if (ioctl( unix_fd, DKIOCINFO, &dkinf ) == -1) return 0;
    return (dkinf.dki_ctype == DKC_CDROM ||
            dkinf.dki_ctype == DKC_NCRFLOPPY ||
            dkinf.dki_ctype == DKC_SMSFLOPPY ||
            dkinf.dki_ctype == DKC_INTEL82072 ||
            dkinf.dki_ctype == DKC_INTEL82077);
#else
    return 0;
#endif
}

966
/* retrieve the device object for a given fd, creating it if needed */
967
static struct device *get_device( dev_t dev, int unix_fd )
968 969 970
{
    struct device *device;
    unsigned int i, hash = dev % DEVICE_HASH_SIZE;
971

972 973 974 975 976 977 978 979
    if (device_hash[hash].next)
    {
        LIST_FOR_EACH_ENTRY( device, &device_hash[hash], struct device, entry )
            if (device->dev == dev) return (struct device *)grab_object( device );
    }
    else list_init( &device_hash[hash] );

    /* not found, create it */
980

981
    if (unix_fd == -1) return NULL;
982 983 984
    if ((device = alloc_object( &device_ops )))
    {
        device->dev = dev;
985
        device->removable = is_device_removable( dev, unix_fd );
986 987 988 989 990 991 992 993 994 995 996 997 998
        for (i = 0; i < INODE_HASH_SIZE; i++) list_init( &device->inode_hash[i] );
        list_add_head( &device_hash[hash], &device->entry );
    }
    return device;
}

static void device_dump( struct object *obj, int verbose )
{
    struct device *device = (struct device *)obj;
    fprintf( stderr, "Device dev=" );
    DUMP_LONG_LONG( device->dev );
    fprintf( stderr, "\n" );
}
999

1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013
static void device_destroy( struct object *obj )
{
    struct device *device = (struct device *)obj;
    unsigned int i;

    for (i = 0; i < INODE_HASH_SIZE; i++)
        assert( list_empty(&device->inode_hash[i]) );

    list_remove( &device->entry );  /* remove it from the hash table */
}


/****************************************************************/
/* inode functions */
1014

1015
/* close all pending file descriptors in the closed list */
1016
static void inode_close_pending( struct inode *inode, int keep_unlinks )
1017
{
1018 1019 1020
    struct list *ptr = list_head( &inode->closed );

    while (ptr)
1021
    {
1022 1023 1024
        struct closed_fd *fd = LIST_ENTRY( ptr, struct closed_fd, entry );
        struct list *next = list_next( &inode->closed, ptr );

1025
        if (fd->unix_fd != -1)
1026
        {
1027 1028
            close( fd->unix_fd );
            fd->unix_fd = -1;
1029
        }
1030
        if (!keep_unlinks || !fd->unlink)  /* get rid of it unless there's an unlink pending on that file */
1031 1032
        {
            list_remove( ptr );
1033
            free( fd->unix_name );
1034 1035 1036
            free( fd );
        }
        ptr = next;
1037 1038 1039
    }
}

1040 1041 1042
static void inode_dump( struct object *obj, int verbose )
{
    struct inode *inode = (struct inode *)obj;
1043
    fprintf( stderr, "Inode device=%p ino=", inode->device );
1044 1045 1046 1047 1048 1049 1050
    DUMP_LONG_LONG( inode->ino );
    fprintf( stderr, "\n" );
}

static void inode_destroy( struct object *obj )
{
    struct inode *inode = (struct inode *)obj;
1051
    struct list *ptr;
1052

1053 1054
    assert( list_empty(&inode->open) );
    assert( list_empty(&inode->locks) );
1055 1056

    list_remove( &inode->entry );
1057 1058 1059 1060 1061

    while ((ptr = list_head( &inode->closed )))
    {
        struct closed_fd *fd = LIST_ENTRY( ptr, struct closed_fd, entry );
        list_remove( ptr );
1062
        if (fd->unix_fd != -1) close( fd->unix_fd );
1063
        if (fd->unlink)
1064 1065 1066
        {
            /* make sure it is still the same file */
            struct stat st;
1067
            if (!stat( fd->unix_name, &st ) && st.st_dev == inode->device->dev && st.st_ino == inode->ino)
1068
            {
1069 1070
                if (S_ISDIR(st.st_mode)) rmdir( fd->unix_name );
                else unlink( fd->unix_name );
1071
            }
1072
        }
1073
        free( fd->unix_name );
1074 1075
        free( fd );
    }
1076
    release_object( inode->device );
1077 1078 1079
}

/* retrieve the inode object for a given fd, creating it if needed */
1080
static struct inode *get_inode( dev_t dev, ino_t ino, int unix_fd )
1081
{
1082
    struct device *device;
1083
    struct inode *inode;
1084
    unsigned int hash = ino % INODE_HASH_SIZE;
1085

1086
    if (!(device = get_device( dev, unix_fd ))) return NULL;
1087 1088

    LIST_FOR_EACH_ENTRY( inode, &device->inode_hash[hash], struct inode, entry )
1089
    {
1090
        if (inode->ino == ino)
1091
        {
1092 1093
            release_object( device );
            return (struct inode *)grab_object( inode );
1094 1095 1096 1097 1098 1099
        }
    }

    /* not found, create it */
    if ((inode = alloc_object( &inode_ops )))
    {
1100
        inode->device = device;
1101 1102
        inode->ino    = ino;
        list_init( &inode->open );
1103
        list_init( &inode->locks );
1104
        list_init( &inode->closed );
1105
        list_add_head( &device->inode_hash[hash], &inode->entry );
1106
    }
1107 1108
    else release_object( device );

1109 1110 1111
    return inode;
}

1112
/* add fd to the inode list of file descriptors to close */
1113 1114
static void inode_add_closed_fd( struct inode *inode, struct closed_fd *fd )
{
1115 1116
    if (!list_empty( &inode->locks ))
    {
1117
        list_add_head( &inode->closed, &fd->entry );
1118
    }
1119
    else if (fd->unlink)  /* close the fd but keep the structure around for unlink */
1120
    {
1121 1122
        if (fd->unix_fd != -1) close( fd->unix_fd );
        fd->unix_fd = -1;
1123 1124 1125
        list_add_head( &inode->closed, &fd->entry );
    }
    else  /* no locks on this inode and no unlink, get rid of the fd */
1126
    {
1127
        if (fd->unix_fd != -1) close( fd->unix_fd );
1128
        free( fd->unix_name );
1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147
        free( fd );
    }
}


/****************************************************************/
/* file lock functions */

static void file_lock_dump( struct object *obj, int verbose )
{
    struct file_lock *lock = (struct file_lock *)obj;
    fprintf( stderr, "Lock %s fd=%p proc=%p start=",
             lock->shared ? "shared" : "excl", lock->fd, lock->process );
    DUMP_LONG_LONG( lock->start );
    fprintf( stderr, " end=" );
    DUMP_LONG_LONG( lock->end );
    fprintf( stderr, "\n" );
}

1148
static int file_lock_signaled( struct object *obj, struct wait_queue_entry *entry )
1149 1150 1151 1152 1153 1154 1155
{
    struct file_lock *lock = (struct file_lock *)obj;
    /* lock is signaled if it has lost its owner */
    return !lock->process;
}

/* set (or remove) a Unix lock if possible for the given range */
1156
static int set_unix_lock( struct fd *fd, file_pos_t start, file_pos_t end, int type )
1157 1158 1159
{
    struct flock fl;

1160
    if (!fd->fs_locks) return 1;  /* no fs locks possible for this fd */
1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173
    for (;;)
    {
        if (start == end) return 1;  /* can't set zero-byte lock */
        if (start > max_unix_offset) return 1;  /* ignore it */
        fl.l_type   = type;
        fl.l_whence = SEEK_SET;
        fl.l_start  = start;
        if (!end || end > max_unix_offset) fl.l_len = 0;
        else fl.l_len = end - start;
        if (fcntl( fd->unix_fd, F_SETLK, &fl ) != -1) return 1;

        switch(errno)
        {
1174 1175 1176 1177 1178 1179 1180 1181
        case EACCES:
            /* check whether locks work at all on this file system */
            if (fcntl( fd->unix_fd, F_GETLK, &fl ) != -1)
            {
                set_error( STATUS_FILE_LOCK_CONFLICT );
                return 0;
            }
            /* fall through */
1182 1183
        case EIO:
        case ENOLCK:
1184
        case ENOTSUP:
1185
            /* no locking on this fs, just ignore it */
1186
            fd->fs_locks = 0;
1187
            return 1;
1188 1189 1190
        case EAGAIN:
            set_error( STATUS_FILE_LOCK_CONFLICT );
            return 0;
1191 1192
        case EBADF:
            /* this can happen if we try to set a write lock on a read-only file */
1193 1194 1195 1196 1197 1198
            /* try to at least grab a read lock */
            if (fl.l_type == F_WRLCK)
            {
                type = F_RDLCK;
                break; /* retry */
            }
1199 1200
            set_error( STATUS_ACCESS_DENIED );
            return 0;
1201
#ifdef EOVERFLOW
1202
        case EOVERFLOW:
1203
#endif
1204
        case EINVAL:
1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220
            /* this can happen if off_t is 64-bit but the kernel only supports 32-bit */
            /* in that case we shrink the limit and retry */
            if (max_unix_offset > INT_MAX)
            {
                max_unix_offset = INT_MAX;
                break;  /* retry */
            }
            /* fall through */
        default:
            file_set_error();
            return 0;
        }
    }
}

/* check if interval [start;end) overlaps the lock */
1221
static inline int lock_overlaps( struct file_lock *lock, file_pos_t start, file_pos_t end )
1222 1223 1224 1225 1226 1227 1228
{
    if (lock->end && start >= lock->end) return 0;
    if (end && lock->start >= end) return 0;
    return 1;
}

/* remove Unix locks for all bytes in the specified area that are no longer locked */
1229
static void remove_unix_locks( struct fd *fd, file_pos_t start, file_pos_t end )
1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242
{
    struct hole
    {
        struct hole *next;
        struct hole *prev;
        file_pos_t   start;
        file_pos_t   end;
    } *first, *cur, *next, *buffer;

    struct list *ptr;
    int count = 0;

    if (!fd->inode) return;
1243
    if (!fd->fs_locks) return;
1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345
    if (start == end || start > max_unix_offset) return;
    if (!end || end > max_unix_offset) end = max_unix_offset + 1;

    /* count the number of locks overlapping the specified area */

    LIST_FOR_EACH( ptr, &fd->inode->locks )
    {
        struct file_lock *lock = LIST_ENTRY( ptr, struct file_lock, inode_entry );
        if (lock->start == lock->end) continue;
        if (lock_overlaps( lock, start, end )) count++;
    }

    if (!count)  /* no locks at all, we can unlock everything */
    {
        set_unix_lock( fd, start, end, F_UNLCK );
        return;
    }

    /* allocate space for the list of holes */
    /* max. number of holes is number of locks + 1 */

    if (!(buffer = malloc( sizeof(*buffer) * (count+1) ))) return;
    first = buffer;
    first->next  = NULL;
    first->prev  = NULL;
    first->start = start;
    first->end   = end;
    next = first + 1;

    /* build a sorted list of unlocked holes in the specified area */

    LIST_FOR_EACH( ptr, &fd->inode->locks )
    {
        struct file_lock *lock = LIST_ENTRY( ptr, struct file_lock, inode_entry );
        if (lock->start == lock->end) continue;
        if (!lock_overlaps( lock, start, end )) continue;

        /* go through all the holes touched by this lock */
        for (cur = first; cur; cur = cur->next)
        {
            if (cur->end <= lock->start) continue; /* hole is before start of lock */
            if (lock->end && cur->start >= lock->end) break;  /* hole is after end of lock */

            /* now we know that lock is overlapping hole */

            if (cur->start >= lock->start)  /* lock starts before hole, shrink from start */
            {
                cur->start = lock->end;
                if (cur->start && cur->start < cur->end) break;  /* done with this lock */
                /* now hole is empty, remove it */
                if (cur->next) cur->next->prev = cur->prev;
                if (cur->prev) cur->prev->next = cur->next;
                else if (!(first = cur->next)) goto done;  /* no more holes at all */
            }
            else if (!lock->end || cur->end <= lock->end)  /* lock larger than hole, shrink from end */
            {
                cur->end = lock->start;
                assert( cur->start < cur->end );
            }
            else  /* lock is in the middle of hole, split hole in two */
            {
                next->prev = cur;
                next->next = cur->next;
                cur->next = next;
                next->start = lock->end;
                next->end = cur->end;
                cur->end = lock->start;
                assert( next->start < next->end );
                assert( cur->end < next->start );
                next++;
                break;  /* done with this lock */
            }
        }
    }

    /* clear Unix locks for all the holes */

    for (cur = first; cur; cur = cur->next)
        set_unix_lock( fd, cur->start, cur->end, F_UNLCK );

 done:
    free( buffer );
}

/* create a new lock on a fd */
static struct file_lock *add_lock( struct fd *fd, int shared, file_pos_t start, file_pos_t end )
{
    struct file_lock *lock;

    if (!(lock = alloc_object( &file_lock_ops ))) return NULL;
    lock->shared  = shared;
    lock->start   = start;
    lock->end     = end;
    lock->fd      = fd;
    lock->process = current->process;

    /* now try to set a Unix lock */
    if (!set_unix_lock( lock->fd, lock->start, lock->end, lock->shared ? F_RDLCK : F_WRLCK ))
    {
        release_object( lock );
        return NULL;
    }
1346 1347 1348
    list_add_tail( &fd->locks, &lock->fd_entry );
    list_add_tail( &fd->inode->locks, &lock->inode_entry );
    list_add_tail( &lock->process->locks, &lock->proc_entry );
1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360
    return lock;
}

/* remove an existing lock */
static void remove_lock( struct file_lock *lock, int remove_unix )
{
    struct inode *inode = lock->fd->inode;

    list_remove( &lock->fd_entry );
    list_remove( &lock->inode_entry );
    list_remove( &lock->proc_entry );
    if (remove_unix) remove_unix_locks( lock->fd, lock->start, lock->end );
1361
    if (list_empty( &inode->locks )) inode_close_pending( inode, 1 );
1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401
    lock->process = NULL;
    wake_up( &lock->obj, 0 );
    release_object( lock );
}

/* remove all locks owned by a given process */
void remove_process_locks( struct process *process )
{
    struct list *ptr;

    while ((ptr = list_head( &process->locks )))
    {
        struct file_lock *lock = LIST_ENTRY( ptr, struct file_lock, proc_entry );
        remove_lock( lock, 1 );  /* this removes it from the list */
    }
}

/* remove all locks on a given fd */
static void remove_fd_locks( struct fd *fd )
{
    file_pos_t start = FILE_POS_T_MAX, end = 0;
    struct list *ptr;

    while ((ptr = list_head( &fd->locks )))
    {
        struct file_lock *lock = LIST_ENTRY( ptr, struct file_lock, fd_entry );
        if (lock->start < start) start = lock->start;
        if (!lock->end || lock->end > end) end = lock->end - 1;
        remove_lock( lock, 0 );
    }
    if (start < end) remove_unix_locks( fd, start, end + 1 );
}

/* add a lock on an fd */
/* returns handle to wait on */
obj_handle_t lock_fd( struct fd *fd, file_pos_t start, file_pos_t count, int shared, int wait )
{
    struct list *ptr;
    file_pos_t end = start + count;

1402 1403 1404 1405 1406 1407
    if (!fd->inode)  /* not a regular file */
    {
        set_error( STATUS_INVALID_DEVICE_REQUEST );
        return 0;
    }

1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419
    /* don't allow wrapping locks */
    if (end && end < start)
    {
        set_error( STATUS_INVALID_PARAMETER );
        return 0;
    }

    /* check if another lock on that file overlaps the area */
    LIST_FOR_EACH( ptr, &fd->inode->locks )
    {
        struct file_lock *lock = LIST_ENTRY( ptr, struct file_lock, inode_entry );
        if (!lock_overlaps( lock, start, end )) continue;
1420
        if (shared && (lock->shared || lock->fd == fd)) continue;
1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457
        /* found one */
        if (!wait)
        {
            set_error( STATUS_FILE_LOCK_CONFLICT );
            return 0;
        }
        set_error( STATUS_PENDING );
        return alloc_handle( current->process, lock, SYNCHRONIZE, 0 );
    }

    /* not found, add it */
    if (add_lock( fd, shared, start, end )) return 0;
    if (get_error() == STATUS_FILE_LOCK_CONFLICT)
    {
        /* Unix lock conflict -> tell client to wait and retry */
        if (wait) set_error( STATUS_PENDING );
    }
    return 0;
}

/* remove a lock on an fd */
void unlock_fd( struct fd *fd, file_pos_t start, file_pos_t count )
{
    struct list *ptr;
    file_pos_t end = start + count;

    /* find an existing lock with the exact same parameters */
    LIST_FOR_EACH( ptr, &fd->locks )
    {
        struct file_lock *lock = LIST_ENTRY( ptr, struct file_lock, fd_entry );
        if ((lock->start == start) && (lock->end == end))
        {
            remove_lock( lock, 1 );
            return;
        }
    }
    set_error( STATUS_FILE_LOCK_CONFLICT );
1458 1459 1460
}


1461 1462 1463
/****************************************************************/
/* file descriptor functions */

1464 1465 1466
static void fd_dump( struct object *obj, int verbose )
{
    struct fd *fd = (struct fd *)obj;
1467
    fprintf( stderr, "Fd unix_fd=%d user=%p options=%08x", fd->unix_fd, fd->user, fd->options );
1468
    if (fd->inode) fprintf( stderr, " inode=%p unlink=%d", fd->inode, fd->closed->unlink );
1469
    fprintf( stderr, "\n" );
1470 1471 1472 1473 1474
}

static void fd_destroy( struct object *obj )
{
    struct fd *fd = (struct fd *)obj;
1475

1476 1477 1478
    free_async_queue( fd->read_q );
    free_async_queue( fd->write_q );
    free_async_queue( fd->wait_q );
1479

1480
    if (fd->completion) release_object( fd->completion );
1481
    remove_fd_locks( fd );
1482
    list_remove( &fd->inode_entry );
1483
    if (fd->poll_index != -1) remove_poll_user( fd, fd->poll_index );
1484 1485 1486 1487 1488 1489 1490 1491
    if (fd->inode)
    {
        inode_add_closed_fd( fd->inode, fd->closed );
        release_object( fd->inode );
    }
    else  /* no inode, close it right away */
    {
        if (fd->unix_fd != -1) close( fd->unix_fd );
1492
        free( fd->unix_name );
1493
    }
1494 1495
}

1496 1497 1498 1499 1500
/* check if the desired access is possible without violating */
/* the sharing mode of other opens of the same file */
static unsigned int check_sharing( struct fd *fd, unsigned int access, unsigned int sharing,
                                   unsigned int open_flags, unsigned int options )
{
1501 1502 1503 1504 1505
    /* only a few access bits are meaningful wrt sharing */
    const unsigned int read_access = FILE_READ_DATA | FILE_EXECUTE;
    const unsigned int write_access = FILE_WRITE_DATA | FILE_APPEND_DATA;
    const unsigned int all_access = read_access | write_access | DELETE;

1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518
    unsigned int existing_sharing = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
    unsigned int existing_access = 0;
    struct list *ptr;

    fd->access = access;
    fd->sharing = sharing;

    LIST_FOR_EACH( ptr, &fd->inode->open )
    {
        struct fd *fd_ptr = LIST_ENTRY( ptr, struct fd, inode_entry );
        if (fd_ptr != fd)
        {
            /* if access mode is 0, sharing mode is ignored */
1519 1520
            if (fd_ptr->access & all_access) existing_sharing &= fd_ptr->sharing;
            existing_access |= fd_ptr->access;
1521 1522 1523
        }
    }

1524 1525
    if (((access & read_access) && !(existing_sharing & FILE_SHARE_READ)) ||
        ((access & write_access) && !(existing_sharing & FILE_SHARE_WRITE)) ||
1526 1527 1528
        ((access & DELETE) && !(existing_sharing & FILE_SHARE_DELETE)))
        return STATUS_SHARING_VIOLATION;
    if (((existing_access & FILE_MAPPING_WRITE) && !(sharing & FILE_SHARE_WRITE)) ||
1529
        ((existing_access & FILE_MAPPING_IMAGE) && (access & FILE_WRITE_DATA)))
1530 1531 1532 1533 1534
        return STATUS_SHARING_VIOLATION;
    if ((existing_access & FILE_MAPPING_IMAGE) && (options & FILE_DELETE_ON_CLOSE))
        return STATUS_CANNOT_DELETE;
    if ((existing_access & FILE_MAPPING_ACCESS) && (open_flags & O_TRUNC))
        return STATUS_USER_MAPPED_FILE;
1535 1536 1537 1538
    if (!(access & all_access))
        return 0;  /* if access mode is 0, sharing mode is ignored (except for mappings) */
    if (((existing_access & read_access) && !(sharing & FILE_SHARE_READ)) ||
        ((existing_access & write_access) && !(sharing & FILE_SHARE_WRITE)) ||
1539 1540 1541 1542 1543
        ((existing_access & DELETE) && !(sharing & FILE_SHARE_DELETE)))
        return STATUS_SHARING_VIOLATION;
    return 0;
}

1544 1545 1546 1547 1548
/* set the events that select waits for on this fd */
void set_fd_events( struct fd *fd, int events )
{
    int user = fd->poll_index;
    assert( poll_users[user] == fd );
1549 1550 1551

    set_fd_epoll_events( fd, user, events );

1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564
    if (events == -1)  /* stop waiting on this fd completely */
    {
        pollfd[user].fd = -1;
        pollfd[user].events = POLLERR;
        pollfd[user].revents = 0;
    }
    else if (pollfd[user].fd != -1 || !pollfd[user].events)
    {
        pollfd[user].fd = fd->unix_fd;
        pollfd[user].events = events;
    }
}

1565 1566 1567 1568 1569
/* prepare an fd for unmounting its corresponding device */
static inline void unmount_fd( struct fd *fd )
{
    assert( fd->inode );

1570 1571
    async_wake_up( fd->read_q, STATUS_VOLUME_DISMOUNTED );
    async_wake_up( fd->write_q, STATUS_VOLUME_DISMOUNTED );
1572 1573 1574 1575 1576 1577

    if (fd->poll_index != -1) set_fd_events( fd, -1 );

    if (fd->unix_fd != -1) close( fd->unix_fd );

    fd->unix_fd = -1;
1578
    fd->no_fd_status = STATUS_VOLUME_DISMOUNTED;
1579
    fd->closed->unix_fd = -1;
1580
    fd->closed->unlink = 0;
1581 1582 1583 1584 1585

    /* stop using Unix locks on this fd (existing locks have been removed by close) */
    fd->fs_locks = 0;
}

1586
/* allocate an fd object, without setting the unix fd yet */
1587
static struct fd *alloc_fd_object(void)
1588
{
1589
    struct fd *fd = alloc_object( &fd_ops );
1590

1591 1592
    if (!fd) return NULL;

1593 1594
    fd->fd_ops     = NULL;
    fd->user       = NULL;
1595 1596
    fd->inode      = NULL;
    fd->closed     = NULL;
1597
    fd->access     = 0;
1598
    fd->options    = 0;
1599
    fd->sharing    = 0;
1600
    fd->unix_fd    = -1;
1601
    fd->unix_name  = NULL;
1602
    fd->cacheable  = 0;
1603
    fd->signaled   = 1;
1604
    fd->fs_locks   = 1;
1605
    fd->poll_index = -1;
1606 1607 1608
    fd->read_q     = NULL;
    fd->write_q    = NULL;
    fd->wait_q     = NULL;
1609
    fd->completion = NULL;
1610
    list_init( &fd->inode_entry );
1611
    list_init( &fd->locks );
1612 1613 1614 1615 1616 1617 1618 1619 1620

    if ((fd->poll_index = add_poll_user( fd )) == -1)
    {
        release_object( fd );
        return NULL;
    }
    return fd;
}

1621
/* allocate a pseudo fd object, for objects that need to behave like files but don't have a unix fd */
1622
struct fd *alloc_pseudo_fd( const struct fd_ops *fd_user_ops, struct object *user, unsigned int options )
1623 1624 1625 1626 1627 1628 1629 1630 1631 1632
{
    struct fd *fd = alloc_object( &fd_ops );

    if (!fd) return NULL;

    fd->fd_ops     = fd_user_ops;
    fd->user       = user;
    fd->inode      = NULL;
    fd->closed     = NULL;
    fd->access     = 0;
1633
    fd->options    = options;
1634
    fd->sharing    = 0;
1635
    fd->unix_name  = NULL;
1636
    fd->unix_fd    = -1;
1637
    fd->cacheable  = 0;
1638
    fd->signaled   = 0;
1639 1640
    fd->fs_locks   = 0;
    fd->poll_index = -1;
1641 1642 1643
    fd->read_q     = NULL;
    fd->write_q    = NULL;
    fd->wait_q     = NULL;
1644
    fd->completion = NULL;
1645
    fd->no_fd_status = STATUS_BAD_DEVICE_TYPE;
1646 1647 1648 1649 1650
    list_init( &fd->inode_entry );
    list_init( &fd->locks );
    return fd;
}

1651
/* duplicate an fd object for a different user */
1652
struct fd *dup_fd_object( struct fd *orig, unsigned int access, unsigned int sharing, unsigned int options )
1653
{
1654
    unsigned int err;
1655
    struct fd *fd = alloc_fd_object();
1656 1657 1658

    if (!fd) return NULL;

1659
    fd->options    = options;
1660
    fd->cacheable  = orig->cacheable;
1661

1662 1663 1664 1665 1666
    if (orig->unix_name)
    {
        if (!(fd->unix_name = mem_alloc( strlen(orig->unix_name) + 1 ))) goto failed;
        strcpy( fd->unix_name, orig->unix_name );
    }
1667 1668 1669 1670 1671 1672 1673

    if (orig->inode)
    {
        struct closed_fd *closed = mem_alloc( sizeof(*closed) );
        if (!closed) goto failed;
        if ((fd->unix_fd = dup( orig->unix_fd )) == -1)
        {
1674
            file_set_error();
1675 1676 1677 1678
            free( closed );
            goto failed;
        }
        closed->unix_fd = fd->unix_fd;
1679 1680
        closed->unlink = 0;
        closed->unix_name = fd->unix_name;
1681 1682 1683
        fd->closed = closed;
        fd->inode = (struct inode *)grab_object( orig->inode );
        list_add_head( &fd->inode->open, &fd->inode_entry );
1684 1685 1686 1687 1688
        if ((err = check_sharing( fd, access, sharing, 0, options )))
        {
            set_error( err );
            goto failed;
        }
1689
    }
1690 1691 1692 1693 1694
    else if ((fd->unix_fd = dup( orig->unix_fd )) == -1)
    {
        file_set_error();
        goto failed;
    }
1695 1696 1697 1698 1699 1700 1701
    return fd;

failed:
    release_object( fd );
    return NULL;
}

1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715
/* find an existing fd object that can be reused for a mapping */
struct fd *get_fd_object_for_mapping( struct fd *fd, unsigned int access, unsigned int sharing )
{
    struct fd *fd_ptr;

    if (!fd->inode) return NULL;

    LIST_FOR_EACH_ENTRY( fd_ptr, &fd->inode->open, struct fd, inode_entry )
        if (fd_ptr->access == access && fd_ptr->sharing == sharing)
            return (struct fd *)grab_object( fd_ptr );

    return NULL;
}

1716 1717 1718 1719 1720 1721
/* set the status to return when the fd has no associated unix fd */
void set_no_fd_status( struct fd *fd, unsigned int status )
{
    fd->no_fd_status = status;
}

1722 1723 1724 1725 1726 1727 1728 1729
/* sets the user of an fd that previously had no user */
void set_fd_user( struct fd *fd, const struct fd_ops *user_ops, struct object *user )
{
    assert( fd->fd_ops == NULL );
    fd->fd_ops = user_ops;
    fd->user   = user;
}

1730
char *dup_fd_name( struct fd *root, const char *name )
1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748
{
    char *ret;

    if (!root) return strdup( name );
    if (!root->unix_name) return NULL;

    /* skip . prefix */
    if (name[0] == '.' && (!name[1] || name[1] == '/')) name++;

    if ((ret = malloc( strlen(root->unix_name) + strlen(name) + 2 )))
    {
        strcpy( ret, root->unix_name );
        if (name[0] && name[0] != '/') strcat( ret, "/" );
        strcat( ret, name );
    }
    return ret;
}

1749
/* open() wrapper that returns a struct fd with no fd user set */
1750
struct fd *open_fd( struct fd *root, const char *name, int flags, mode_t *mode, unsigned int access,
1751
                    unsigned int sharing, unsigned int options )
1752 1753 1754
{
    struct stat st;
    struct closed_fd *closed_fd;
1755
    struct fd *fd;
1756
    int root_fd = -1;
1757
    int rw_mode;
1758

1759 1760
    if (((options & FILE_DELETE_ON_CLOSE) && !(access & DELETE)) ||
        ((options & FILE_DIRECTORY_FILE) && (flags & O_TRUNC)))
1761 1762 1763 1764 1765
    {
        set_error( STATUS_INVALID_PARAMETER );
        return NULL;
    }

1766
    if (!(fd = alloc_fd_object())) return NULL;
1767

1768
    fd->options = options;
1769
    if (!(closed_fd = mem_alloc( sizeof(*closed_fd) )))
1770 1771 1772 1773
    {
        release_object( fd );
        return NULL;
    }
1774

1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785
    if (root)
    {
        if ((root_fd = get_unix_fd( root )) == -1) goto error;
        if (fchdir( root_fd ) == -1)
        {
            file_set_error();
            root_fd = -1;
            goto error;
        }
    }

1786 1787 1788
    /* create the directory if needed */
    if ((options & FILE_DIRECTORY_FILE) && (flags & O_CREAT))
    {
1789
        if (mkdir( name, *mode ) == -1)
1790 1791 1792 1793
        {
            if (errno != EEXIST || (flags & O_EXCL))
            {
                file_set_error();
1794
                goto error;
1795 1796 1797 1798
            }
        }
        flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
    }
1799 1800 1801 1802 1803 1804 1805 1806

    if ((access & FILE_UNIX_WRITE_ACCESS) && !(options & FILE_DIRECTORY_FILE))
    {
        if (access & FILE_UNIX_READ_ACCESS) rw_mode = O_RDWR;
        else rw_mode = O_WRONLY;
    }
    else rw_mode = O_RDONLY;

1807
    fd->unix_name = dup_fd_name( root, name );
1808

1809
    if ((fd->unix_fd = open( name, rw_mode | (flags & ~O_TRUNC), *mode )) == -1)
1810
    {
1811
        /* if we tried to open a directory for write access, retry read-only */
1812 1813 1814 1815 1816 1817 1818
        if (errno == EISDIR)
        {
            if ((access & FILE_UNIX_WRITE_ACCESS) || (flags & O_CREAT))
                fd->unix_fd = open( name, O_RDONLY | (flags & ~(O_TRUNC | O_CREAT | O_EXCL)), *mode );
        }

        if (fd->unix_fd == -1)
1819 1820 1821 1822
        {
            file_set_error();
            goto error;
        }
1823
    }
1824

1825
    closed_fd->unix_fd = fd->unix_fd;
1826 1827
    closed_fd->unlink = 0;
    closed_fd->unix_name = fd->unix_name;
1828 1829 1830
    fstat( fd->unix_fd, &st );
    *mode = st.st_mode;

1831 1832
    /* only bother with an inode for normal files and directories */
    if (S_ISREG(st.st_mode) || S_ISDIR(st.st_mode))
1833
    {
1834
        unsigned int err;
1835
        struct inode *inode = get_inode( st.st_dev, st.st_ino, fd->unix_fd );
1836 1837 1838 1839 1840 1841

        if (!inode)
        {
            /* we can close the fd because there are no others open on the same file,
             * otherwise we wouldn't have failed to allocate a new inode
             */
1842
            goto error;
1843 1844 1845
        }
        fd->inode = inode;
        fd->closed = closed_fd;
1846
        fd->cacheable = !inode->device->removable;
1847
        list_add_head( &inode->open, &fd->inode_entry );
1848
        closed_fd = NULL;
1849 1850 1851 1852 1853

        /* check directory options */
        if ((options & FILE_DIRECTORY_FILE) && !S_ISDIR(st.st_mode))
        {
            set_error( STATUS_NOT_A_DIRECTORY );
1854
            goto error;
1855 1856 1857 1858
        }
        if ((options & FILE_NON_DIRECTORY_FILE) && S_ISDIR(st.st_mode))
        {
            set_error( STATUS_FILE_IS_A_DIRECTORY );
1859
            goto error;
1860
        }
1861
        if ((err = check_sharing( fd, access, sharing, flags, options )))
1862
        {
1863
            set_error( err );
1864
            goto error;
1865
        }
1866 1867 1868 1869 1870 1871 1872 1873 1874

        /* can't unlink files if we don't have permission to access */
        if ((options & FILE_DELETE_ON_CLOSE) && !(flags & O_CREAT) &&
            !(st.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)))
        {
            set_error( STATUS_CANNOT_DELETE );
            goto error;
        }

1875
        fd->closed->unlink = (options & FILE_DELETE_ON_CLOSE) != 0;
1876 1877 1878 1879 1880
        if (flags & O_TRUNC)
        {
            if (S_ISDIR(st.st_mode))
            {
                set_error( STATUS_OBJECT_NAME_COLLISION );
1881
                goto error;
1882 1883 1884
            }
            ftruncate( fd->unix_fd, 0 );
        }
1885
    }
1886
    else  /* special file */
1887
    {
1888
        if (options & FILE_DELETE_ON_CLOSE)  /* we can't unlink special files */
1889 1890
        {
            set_error( STATUS_INVALID_PARAMETER );
1891
            goto error;
1892
        }
1893
        free( closed_fd );
1894
        fd->cacheable = 1;
1895
    }
1896
    if (root_fd != -1) fchdir( server_dir_fd ); /* go back to the server dir */
1897
    return fd;
1898 1899 1900 1901

error:
    release_object( fd );
    free( closed_fd );
1902
    if (root_fd != -1) fchdir( server_dir_fd ); /* go back to the server dir */
1903
    return NULL;
1904 1905
}

1906 1907
/* create an fd for an anonymous file */
/* if the function fails the unix fd is closed */
1908 1909
struct fd *create_anonymous_fd( const struct fd_ops *fd_user_ops, int unix_fd, struct object *user,
                                unsigned int options )
1910
{
1911
    struct fd *fd = alloc_fd_object();
1912 1913 1914

    if (fd)
    {
1915
        set_fd_user( fd, fd_user_ops, user );
1916
        fd->unix_fd = unix_fd;
1917
        fd->options = options;
1918 1919 1920 1921 1922 1923
        return fd;
    }
    close( unix_fd );
    return NULL;
}

1924 1925
/* retrieve the object that is using an fd */
void *get_fd_user( struct fd *fd )
1926
{
1927 1928
    return fd->user;
}
1929

1930 1931 1932 1933 1934 1935
/* retrieve the opening options for the fd */
unsigned int get_fd_options( struct fd *fd )
{
    return fd->options;
}

1936 1937 1938
/* retrieve the unix fd for an object */
int get_unix_fd( struct fd *fd )
{
1939
    if (fd->unix_fd == -1) set_error( fd->no_fd_status );
1940
    return fd->unix_fd;
1941 1942
}

1943 1944 1945 1946 1947 1948
/* check if two file descriptors point to the same file */
int is_same_file_fd( struct fd *fd1, struct fd *fd2 )
{
    return fd1->inode == fd2->inode;
}

1949 1950 1951 1952 1953 1954
/* allow the fd to be cached (can't be reset once set) */
void allow_fd_caching( struct fd *fd )
{
    fd->cacheable = 1;
}

1955 1956 1957 1958 1959 1960
/* check if fd is on a removable device */
int is_fd_removable( struct fd *fd )
{
    return (fd->inode && fd->inode->device->removable);
}

1961 1962 1963 1964 1965 1966 1967
/* set or clear the fd signaled state */
void set_fd_signaled( struct fd *fd, int signaled )
{
    fd->signaled = signaled;
    if (signaled) wake_up( fd->user, 0 );
}

1968
/* check if fd is signaled */
1969 1970 1971 1972 1973
int is_fd_signaled( struct fd *fd )
{
    return fd->signaled;
}

1974 1975 1976 1977 1978 1979
/* handler for close_handle that refuses to close fd-associated handles in other processes */
int fd_close_handle( struct object *obj, struct process *process, obj_handle_t handle )
{
    return (!current || current->process == process);
}

1980 1981 1982 1983 1984
/* check if events are pending and if yes return which one(s) */
int check_fd_events( struct fd *fd, int events )
{
    struct pollfd pfd;

1985
    if (fd->unix_fd == -1) return POLLERR;
1986
    if (fd->inode) return events;  /* regular files are always signaled */
1987

1988 1989 1990 1991
    pfd.fd     = fd->unix_fd;
    pfd.events = events;
    if (poll( &pfd, 1, 0 ) <= 0) return 0;
    return pfd.revents;
1992 1993 1994
}

/* default signaled() routine for objects that poll() on an fd */
1995
int default_fd_signaled( struct object *obj, struct wait_queue_entry *entry )
1996
{
1997
    struct fd *fd = get_obj_fd( obj );
1998
    int ret = fd->signaled;
1999 2000
    release_object( fd );
    return ret;
2001 2002
}

2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
/* default map_access() routine for objects that behave like an fd */
unsigned int default_fd_map_access( struct object *obj, unsigned int access )
{
    if (access & GENERIC_READ)    access |= FILE_GENERIC_READ;
    if (access & GENERIC_WRITE)   access |= FILE_GENERIC_WRITE;
    if (access & GENERIC_EXECUTE) access |= FILE_GENERIC_EXECUTE;
    if (access & GENERIC_ALL)     access |= FILE_ALL_ACCESS;
    return access & ~(GENERIC_READ | GENERIC_WRITE | GENERIC_EXECUTE | GENERIC_ALL);
}

2013 2014 2015 2016
int default_fd_get_poll_events( struct fd *fd )
{
    int events = 0;

2017 2018
    if (async_waiting( fd->read_q )) events |= POLLIN;
    if (async_waiting( fd->write_q )) events |= POLLOUT;
2019 2020 2021
    return events;
}

2022 2023 2024
/* default handler for poll() events */
void default_poll_event( struct fd *fd, int event )
{
2025 2026
    if (event & (POLLIN | POLLERR | POLLHUP)) async_wake_up( fd->read_q, STATUS_ALERTED );
    if (event & (POLLOUT | POLLERR | POLLHUP)) async_wake_up( fd->write_q, STATUS_ALERTED );
2027 2028

    /* if an error occurred, stop polling this fd to avoid busy-looping */
2029
    if (event & (POLLERR | POLLHUP)) set_fd_events( fd, -1 );
2030
    else if (!fd->inode) set_fd_events( fd, fd->fd_ops->get_poll_events( fd ) );
2031 2032
}

2033
struct async *fd_queue_async( struct fd *fd, const async_data_t *data, int type )
2034
{
2035
    struct async_queue *queue;
2036
    struct async *async;
2037 2038 2039 2040

    switch (type)
    {
    case ASYNC_TYPE_READ:
2041
        if (!fd->read_q && !(fd->read_q = create_async_queue( fd ))) return NULL;
2042
        queue = fd->read_q;
2043 2044
        break;
    case ASYNC_TYPE_WRITE:
2045
        if (!fd->write_q && !(fd->write_q = create_async_queue( fd ))) return NULL;
2046
        queue = fd->write_q;
2047
        break;
2048
    case ASYNC_TYPE_WAIT:
2049
        if (!fd->wait_q && !(fd->wait_q = create_async_queue( fd ))) return NULL;
2050
        queue = fd->wait_q;
2051
        break;
2052
    default:
2053
        queue = NULL;
2054
        assert(0);
2055 2056
    }

2057
    if ((async = create_async( current, queue, data )) && type != ASYNC_TYPE_WAIT)
2058 2059 2060 2061
    {
        if (!fd->inode)
            set_fd_events( fd, fd->fd_ops->get_poll_events( fd ) );
        else  /* regular files are always ready for read and write */
2062
            async_wake_up( queue, STATUS_ALERTED );
2063 2064
    }
    return async;
2065 2066
}

2067
void fd_async_wake_up( struct fd *fd, int type, unsigned int status )
2068 2069 2070 2071
{
    switch (type)
    {
    case ASYNC_TYPE_READ:
2072
        async_wake_up( fd->read_q, status );
2073 2074
        break;
    case ASYNC_TYPE_WRITE:
2075
        async_wake_up( fd->write_q, status );
2076 2077
        break;
    case ASYNC_TYPE_WAIT:
2078
        async_wake_up( fd->wait_q, status );
2079 2080 2081 2082 2083 2084
        break;
    default:
        assert(0);
    }
}

2085 2086 2087 2088 2089
void fd_reselect_async( struct fd *fd, struct async_queue *queue )
{
    fd->fd_ops->reselect_async( fd, queue );
}

2090 2091 2092 2093 2094
void no_fd_queue_async( struct fd *fd, const async_data_t *data, int type, int count )
{
    set_error( STATUS_OBJECT_TYPE_MISMATCH );
}

2095
void default_fd_queue_async( struct fd *fd, const async_data_t *data, int type, int count )
2096
{
2097
    struct async *async;
2098

2099
    if ((async = fd_queue_async( fd, data, type )))
2100 2101 2102 2103
    {
        release_object( async );
        set_error( STATUS_PENDING );
    }
2104 2105
}

2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118
/* default reselect_async() fd routine */
void default_fd_reselect_async( struct fd *fd, struct async_queue *queue )
{
    if (queue != fd->wait_q)
    {
        int poll_events = fd->fd_ops->get_poll_events( fd );
        int events = check_fd_events( fd, poll_events );
        if (events) fd->fd_ops->poll_event( fd, events );
        else set_fd_events( fd, poll_events );
    }
}

/* default cancel_async() fd routine */
2119
int default_fd_cancel_async( struct fd *fd, struct process *process, struct thread *thread, client_ptr_t iosb )
2120
{
2121 2122 2123 2124 2125
    int n = 0;

    n += async_wake_up_by( fd->read_q, process, thread, iosb, STATUS_CANCELLED );
    n += async_wake_up_by( fd->write_q, process, thread, iosb, STATUS_CANCELLED );
    n += async_wake_up_by( fd->wait_q, process, thread, iosb, STATUS_CANCELLED );
2126
    return n;
2127 2128
}

2129 2130 2131 2132 2133 2134 2135 2136 2137 2138
static inline int is_valid_mounted_device( struct stat *st )
{
#if defined(linux) || defined(__sun__)
    return S_ISBLK( st->st_mode );
#else
    /* disks are char devices on *BSD */
    return S_ISCHR( st->st_mode );
#endif
}

2139
/* close all Unix file descriptors on a device to allow unmounting it */
2140
static void unmount_device( struct fd *device_fd )
2141 2142
{
    unsigned int i;
2143 2144
    struct stat st;
    struct device *device;
2145 2146
    struct inode *inode;
    struct fd *fd;
2147
    int unix_fd = get_unix_fd( device_fd );
2148

2149 2150
    if (unix_fd == -1) return;

2151
    if (fstat( unix_fd, &st ) == -1 || !is_valid_mounted_device( &st ))
2152 2153 2154 2155 2156
    {
        set_error( STATUS_INVALID_PARAMETER );
        return;
    }

2157
    if (!(device = get_device( st.st_rdev, -1 ))) return;
2158

2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172
    for (i = 0; i < INODE_HASH_SIZE; i++)
    {
        LIST_FOR_EACH_ENTRY( inode, &device->inode_hash[i], struct inode, entry )
        {
            LIST_FOR_EACH_ENTRY( fd, &inode->open, struct fd, inode_entry )
            {
                unmount_fd( fd );
            }
            inode_close_pending( inode, 0 );
        }
    }
    /* remove it from the hash table */
    list_remove( &device->entry );
    list_init( &device->entry );
2173
    release_object( device );
2174 2175
}

2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191
/* default read() routine */
obj_handle_t no_fd_read( struct fd *fd, const async_data_t *async, int blocking, file_pos_t pos )
{
    set_error( STATUS_OBJECT_TYPE_MISMATCH );
    return 0;
}

/* default write() routine */
obj_handle_t no_fd_write( struct fd *fd, const async_data_t *async, int blocking,
                          file_pos_t pos, data_size_t *written )
{
    set_error( STATUS_OBJECT_TYPE_MISMATCH );
    return 0;
}

/* default flush() routine */
2192
obj_handle_t no_fd_flush( struct fd *fd, const async_data_t *async, int blocking )
2193 2194
{
    set_error( STATUS_OBJECT_TYPE_MISMATCH );
2195
    return 0;
2196 2197 2198
}

/* default ioctl() routine */
2199
obj_handle_t no_fd_ioctl( struct fd *fd, ioctl_code_t code, const async_data_t *async, int blocking )
2200 2201 2202 2203 2204
{
    set_error( STATUS_OBJECT_TYPE_MISMATCH );
    return 0;
}

2205
/* default ioctl() routine */
2206
obj_handle_t default_fd_ioctl( struct fd *fd, ioctl_code_t code, const async_data_t *async, int blocking )
2207 2208 2209 2210 2211
{
    switch(code)
    {
    case FSCTL_DISMOUNT_VOLUME:
        unmount_device( fd );
2212
        return 0;
2213 2214
    default:
        set_error( STATUS_NOT_SUPPORTED );
2215
        return 0;
2216 2217 2218
    }
}

2219 2220 2221 2222 2223 2224 2225
/* same as get_handle_obj but retrieve the struct fd associated to the object */
static struct fd *get_handle_fd_obj( struct process *process, obj_handle_t handle,
                                     unsigned int access )
{
    struct fd *fd = NULL;
    struct object *obj;

2226
    if ((obj = get_handle_obj( process, handle, access, NULL )))
2227
    {
2228
        fd = get_obj_fd( obj );
2229 2230 2231 2232 2233
        release_object( obj );
    }
    return fd;
}

2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263
/* set disposition for the fd */
static void set_fd_disposition( struct fd *fd, int unlink )
{
    struct stat st;

    if (!fd->inode)
    {
        set_error( STATUS_OBJECT_TYPE_MISMATCH );
        return;
    }

    if (fd->unix_fd == -1)
    {
        set_error( fd->no_fd_status );
        return;
    }

    if (fstat( fd->unix_fd, &st ) == -1)
    {
        file_set_error();
        return;
    }

    /* can't unlink special files */
    if (unlink && !S_ISDIR(st.st_mode) && !S_ISREG(st.st_mode))
    {
        set_error( STATUS_INVALID_PARAMETER );
        return;
    }

2264 2265 2266 2267 2268 2269 2270
    /* can't unlink files we don't have permission to access */
    if (unlink && !(st.st_mode & (S_IWUSR | S_IWGRP | S_IWOTH)))
    {
        set_error( STATUS_CANNOT_DELETE );
        return;
    }

2271 2272 2273
    fd->closed->unlink = unlink || (fd->options & FILE_DELETE_ON_CLOSE);
}

2274
/* set new name for the fd */
2275 2276
static void set_fd_name( struct fd *fd, struct fd *root, const char *nameptr,
                         data_size_t len, int create_link )
2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307
{
    struct inode *inode;
    struct stat st;
    char *name;

    if (!fd->inode || !fd->unix_name)
    {
        set_error( STATUS_OBJECT_TYPE_MISMATCH );
        return;
    }
    if (!len || ((nameptr[0] == '/') ^ !root))
    {
        set_error( STATUS_OBJECT_PATH_SYNTAX_BAD );
        return;
    }
    if (!(name = mem_alloc( len + 1 ))) return;
    memcpy( name, nameptr, len );
    name[len] = 0;

    if (root)
    {
        char *combined_name = dup_fd_name( root, name );
        if (!combined_name)
        {
            set_error( STATUS_NO_MEMORY );
            goto failed;
        }
        free( name );
        name = combined_name;
    }

2308 2309 2310 2311 2312 2313 2314 2315
    /* when creating a hard link, source cannot be a dir */
    if (create_link && fd->unix_fd != -1 &&
        !fstat( fd->unix_fd, &st ) && S_ISDIR( st.st_mode ))
    {
        set_error( STATUS_FILE_IS_A_DIRECTORY );
        goto failed;
    }

2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336
    if (!stat( name, &st ))
    {
        /* can't replace directories or special files */
        if (!S_ISREG( st.st_mode ))
        {
            set_error( STATUS_ACCESS_DENIED );
            goto failed;
        }

        /* can't replace an opened file */
        if ((inode = get_inode( st.st_dev, st.st_ino, -1 )))
        {
            int is_empty = list_empty( &inode->open );
            release_object( inode );
            if (!is_empty)
            {
                set_error( STATUS_ACCESS_DENIED );
                goto failed;
            }
        }

2337
        /* link() expects that the target doesn't exist */
2338
        /* rename() cannot replace files with directories */
2339 2340
        if (create_link || (fd->unix_fd != -1 &&
            !fstat( fd->unix_fd, &st ) && S_ISDIR( st.st_mode )))
2341
        {
2342 2343 2344 2345 2346
            if (unlink( name ))
            {
                file_set_error();
                goto failed;
            }
2347 2348 2349
        }
    }

2350 2351 2352 2353 2354 2355 2356 2357
    if (create_link)
    {
        if (link( fd->unix_name, name ))
            file_set_error();
        free( name );
        return;
    }

2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372
    if (rename( fd->unix_name, name ))
    {
        file_set_error();
        goto failed;
    }

    free( fd->unix_name );
    fd->unix_name = name;
    fd->closed->unix_name = name;
    return;

failed:
    free( name );
}

2373
struct completion *fd_get_completion( struct fd *fd, apc_param_t *p_key )
2374
{
2375
    *p_key = fd->comp_key;
2376 2377 2378 2379 2380 2381 2382
    return fd->completion ? (struct completion *)grab_object( fd->completion ) : NULL;
}

void fd_copy_completion( struct fd *src, struct fd *dst )
{
    assert( !dst->completion );
    dst->completion = fd_get_completion( src, &dst->comp_key );
2383 2384
}

2385
/* flush a file buffers */
2386
DECL_HANDLER(flush)
2387
{
2388
    struct fd *fd = get_handle_fd_obj( current->process, req->async.handle, 0 );
2389 2390 2391

    if (fd)
    {
2392
        reply->event = fd->fd_ops->flush( fd, &req->async, req->blocking );
2393 2394 2395 2396
        release_object( fd );
    }
}

2397 2398 2399
/* open a file object */
DECL_HANDLER(open_file_object)
{
2400
    struct unicode_str name = get_req_unicode_str();
2401
    struct object *obj, *result, *root = NULL;
2402

2403 2404 2405 2406 2407
    if (req->rootdir && !(root = get_handle_obj( current->process, req->rootdir, 0, NULL ))) return;

    obj = open_named_object( root, NULL, &name, req->attributes );
    if (root) release_object( root );
    if (!obj) return;
2408

2409 2410 2411 2412 2413 2414
    if ((result = obj->ops->open_file( obj, req->access, req->sharing, req->options )))
    {
        reply->handle = alloc_handle( current->process, result, req->access, req->attributes );
        release_object( result );
    }
    release_object( obj );
2415 2416
}

2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430
/* get the Unix name from a file handle */
DECL_HANDLER(get_handle_unix_name)
{
    struct fd *fd;

    if ((fd = get_handle_fd_obj( current->process, req->handle, 0 )))
    {
        if (fd->unix_name)
        {
            data_size_t name_len = strlen( fd->unix_name );
            reply->name_len = name_len;
            if (name_len <= get_reply_max_size()) set_reply_data( fd->unix_name, name_len );
            else set_error( STATUS_BUFFER_OVERFLOW );
        }
2431
        else set_error( STATUS_OBJECT_TYPE_MISMATCH );
2432 2433 2434 2435
        release_object( fd );
    }
}

2436 2437 2438 2439 2440
/* get a Unix fd to access a file */
DECL_HANDLER(get_handle_fd)
{
    struct fd *fd;

2441
    if ((fd = get_handle_fd_obj( current->process, req->handle, 0 )))
2442
    {
2443 2444
        int unix_fd = get_unix_fd( fd );
        if (unix_fd != -1)
2445
        {
2446
            reply->type = fd->fd_ops->get_fd_type( fd );
2447
            reply->cacheable = fd->cacheable;
2448 2449
            reply->options = fd->options;
            reply->access = get_handle_access( current->process, req->handle );
2450
            send_client_fd( current->process, unix_fd, req->handle );
2451
        }
2452 2453 2454 2455
        release_object( fd );
    }
}

2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481
/* perform a read on a file object */
DECL_HANDLER(read)
{
    struct fd *fd = get_handle_fd_obj( current->process, req->async.handle, FILE_READ_DATA );

    if (fd)
    {
        reply->wait    = fd->fd_ops->read( fd, &req->async, req->blocking, req->pos );
        reply->options = fd->options;
        release_object( fd );
    }
}

/* perform a write on a file object */
DECL_HANDLER(write)
{
    struct fd *fd = get_handle_fd_obj( current->process, req->async.handle, FILE_WRITE_DATA );

    if (fd)
    {
        reply->wait    = fd->fd_ops->write( fd, &req->async, req->blocking, req->pos, &reply->size );
        reply->options = fd->options;
        release_object( fd );
    }
}

2482 2483 2484 2485
/* perform an ioctl on a file */
DECL_HANDLER(ioctl)
{
    unsigned int access = (req->code >> 14) & (FILE_READ_DATA|FILE_WRITE_DATA);
2486
    struct fd *fd = get_handle_fd_obj( current->process, req->async.handle, access );
2487 2488 2489

    if (fd)
    {
2490
        reply->wait    = fd->fd_ops->ioctl( fd, req->code, &req->async, req->blocking );
2491
        reply->options = fd->options;
2492 2493 2494 2495
        release_object( fd );
    }
}

2496 2497 2498
/* create / reschedule an async I/O */
DECL_HANDLER(register_async)
{
2499 2500
    unsigned int access;
    struct fd *fd;
2501

2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513
    switch(req->type)
    {
    case ASYNC_TYPE_READ:
        access = FILE_READ_DATA;
        break;
    case ASYNC_TYPE_WRITE:
        access = FILE_WRITE_DATA;
        break;
    default:
        set_error( STATUS_INVALID_PARAMETER );
        return;
    }
2514

2515
    if ((fd = get_handle_fd_obj( current->process, req->async.handle, access )))
2516
    {
2517
        if (get_unix_fd( fd ) != -1) fd->fd_ops->queue_async( fd, &req->async, req->type, req->count );
2518 2519 2520
        release_object( fd );
    }
}
2521 2522 2523 2524 2525

/* cancels all async I/O */
DECL_HANDLER(cancel_async)
{
    struct fd *fd = get_handle_fd_obj( current->process, req->handle, 0 );
2526
    struct thread *thread = req->only_thread ? current : NULL;
2527

2528 2529
    if (fd)
    {
2530 2531
        int count = fd->fd_ops->cancel_async( fd, current->process, thread, req->iosb );
        if (!count && req->iosb) set_error( STATUS_NOT_FOUND );
2532
        release_object( fd );
2533
    }
2534
}
2535 2536 2537 2538 2539 2540 2541 2542

/* attach completion object to a fd */
DECL_HANDLER(set_completion_info)
{
    struct fd *fd = get_handle_fd_obj( current->process, req->handle, 0 );

    if (fd)
    {
2543
        if (!(fd->options & (FILE_SYNCHRONOUS_IO_ALERT | FILE_SYNCHRONOUS_IO_NONALERT)) && !fd->completion)
2544 2545 2546 2547 2548 2549 2550 2551
        {
            fd->completion = get_completion_obj( current->process, req->chandle, IO_COMPLETION_MODIFY_STATE );
            fd->comp_key = req->ckey;
        }
        else set_error( STATUS_INVALID_PARAMETER );
        release_object( fd );
    }
}
2552 2553 2554 2555 2556 2557 2558

/* push new completion msg into a completion queue attached to the fd */
DECL_HANDLER(add_fd_completion)
{
    struct fd *fd = get_handle_fd_obj( current->process, req->handle, 0 );
    if (fd)
    {
2559 2560
        if (fd->completion)
            add_completion( fd->completion, fd->comp_key, req->cvalue, req->status, req->information );
2561 2562 2563
        release_object( fd );
    }
}
2564

2565 2566
/* set fd disposition information */
DECL_HANDLER(set_fd_disp_info)
2567 2568 2569 2570 2571 2572 2573 2574
{
    struct fd *fd = get_handle_fd_obj( current->process, req->handle, DELETE );
    if (fd)
    {
        set_fd_disposition( fd, req->unlink );
        release_object( fd );
    }
}
2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592

/* set fd name information */
DECL_HANDLER(set_fd_name_info)
{
    struct fd *fd, *root_fd = NULL;

    if (req->rootdir)
    {
        struct dir *root;

        if (!(root = get_dir_obj( current->process, req->rootdir, 0 ))) return;
        root_fd = get_obj_fd( (struct object *)root );
        release_object( root );
        if (!root_fd) return;
    }

    if ((fd = get_handle_fd_obj( current->process, req->handle, 0 )))
    {
2593
        set_fd_name( fd, root_fd, get_req_data(), get_req_data_size(), req->link );
2594 2595 2596 2597
        release_object( fd );
    }
    if (root_fd) release_object( root_fd );
}