sock.c 107 KB
Newer Older
1 2 3
/*
 * Server-side socket management
 *
4
 * Copyright (C) 1999 Marcus Meissner, Ove Kåven
5
 *
6 7 8 9 10 11 12 13 14 15 16 17
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
19
 *
20 21 22 23
 * FIXME: we use read|write access in all cases. Shouldn't we depend that
 * on the access of the current handle?
 */

24 25
#include "config.h"

26 27
#include <assert.h>
#include <fcntl.h>
28
#include <stdarg.h>
29 30 31 32
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
33 34 35 36 37 38
#ifdef HAVE_IFADDRS_H
# include <ifaddrs.h>
#endif
#ifdef HAVE_NET_IF_H
# include <net/if.h>
#endif
39 40 41
#ifdef HAVE_NETINET_IN_H
# include <netinet/in.h>
#endif
42
#include <poll.h>
43 44
#include <sys/time.h>
#include <sys/types.h>
45
#include <sys/socket.h>
46
#include <sys/ioctl.h>
47 48 49
#ifdef HAVE_SYS_FILIO_H
# include <sys/filio.h>
#endif
50 51
#include <time.h>
#include <unistd.h>
52
#include <limits.h>
53 54 55
#ifdef HAVE_LINUX_FILTER_H
# include <linux/filter.h>
#endif
56 57 58
#ifdef HAVE_LINUX_RTNETLINK_H
# include <linux/rtnetlink.h>
#endif
59

60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
#ifdef HAVE_NETIPX_IPX_H
# include <netipx/ipx.h>
#elif defined(HAVE_LINUX_IPX_H)
# ifdef HAVE_ASM_TYPES_H
#  include <asm/types.h>
# endif
# ifdef HAVE_LINUX_TYPES_H
#  include <linux/types.h>
# endif
# include <linux/ipx.h>
#endif
#if defined(SOL_IPX) || defined(SO_DEFAULT_HEADERS)
# define HAS_IPX
#endif

#ifdef HAVE_LINUX_IRDA_H
# ifdef HAVE_LINUX_TYPES_H
#  include <linux/types.h>
# endif
# include <linux/irda.h>
# define HAS_IRDA
#endif

83 84
#include "ntstatus.h"
#define WIN32_NO_STATUS
85
#include "windef.h"
86
#include "winternl.h"
87
#include "winerror.h"
88 89
#define USE_WS_PREFIX
#include "winsock2.h"
90
#include "ws2tcpip.h"
91
#include "wsipx.h"
92
#include "af_irda.h"
93
#include "wine/afd.h"
94

95
#include "process.h"
96
#include "file.h"
97 98 99
#include "handle.h"
#include "thread.h"
#include "request.h"
100
#include "user.h"
101

102 103 104 105
#if defined(linux) && !defined(IP_UNICAST_IF)
#define IP_UNICAST_IF 50
#endif

106 107
static const char magic_loopback_addr[] = {127, 12, 34, 56};

108 109 110 111 112 113 114 115 116
union win_sockaddr
{
    struct WS_sockaddr addr;
    struct WS_sockaddr_in in;
    struct WS_sockaddr_in6 in6;
    struct WS_sockaddr_ipx ipx;
    SOCKADDR_IRDA irda;
};

117 118 119 120 121 122 123 124
static struct list poll_list = LIST_INIT( poll_list );

struct poll_req
{
    struct list entry;
    struct async *async;
    struct iosb *iosb;
    struct timeout_user *timeout;
125
    timeout_t orig_timeout;
126
    int exclusive;
127 128 129 130
    unsigned int count;
    struct
    {
        struct sock *sock;
131
        int mask;
132 133 134
        obj_handle_t handle;
        int flags;
        unsigned int status;
135 136 137
    } sockets[1];
};

138 139 140 141
struct accept_req
{
    struct list entry;
    struct async *async;
142
    struct iosb *iosb;
143
    struct sock *sock, *acceptsock;
144 145 146 147
    int accepted;
    unsigned int recv_len, local_len;
};

148 149 150 151 152 153 154 155
struct connect_req
{
    struct async *async;
    struct iosb *iosb;
    struct sock *sock;
    unsigned int addr_len, send_len, send_cursor;
};

156 157 158 159 160 161 162 163 164
enum connection_state
{
    SOCK_LISTENING,
    SOCK_UNCONNECTED,
    SOCK_CONNECTING,
    SOCK_CONNECTED,
    SOCK_CONNECTIONLESS,
};

165 166 167
struct sock
{
    struct object       obj;         /* object header */
168
    struct fd          *fd;          /* socket file descriptor */
169
    enum connection_state state;     /* connection state */
170
    unsigned int        mask;        /* event mask */
171
    /* pending AFD_POLL_* events which have not yet been reported to the application */
172
    unsigned int        pending_events;
173 174
    /* AFD_POLL_* events which have already been reported and should not be
     * selected for again until reset by a relevant call.
175
     *
176 177 178
     * For example, if AFD_POLL_READ is set here and not in pending_events, it
     * has already been reported and consumed, and we should not report it
     * again, even if POLLIN is signaled, until it is reset by e.g recv().
179 180 181 182 183
     *
     * If an event has been signaled and not consumed yet, it will be set in
     * both pending_events and reported_events (as we should only ever report
     * any event once until it is reset.) */
    unsigned int        reported_events;
184
    unsigned int        flags;       /* socket flags */
185
    unsigned short      proto;       /* socket protocol */
186 187
    unsigned short      type;        /* socket type */
    unsigned short      family;      /* socket family */
188
    struct event       *event;       /* event object */
189 190
    user_handle_t       window;      /* window to send the message to */
    unsigned int        message;     /* message to send */
191
    obj_handle_t        wparam;      /* message wparam (socket handle) */
192
    int                 errors[AFD_POLL_BIT_COUNT]; /* event errors */
193
    timeout_t           connect_time;/* time the socket was connected */
194
    struct sock        *deferred;    /* socket that waits for a deferred accept */
195 196 197
    struct async_queue  read_q;      /* queue for asynchronous reads */
    struct async_queue  write_q;     /* queue for asynchronous writes */
    struct async_queue  ifchange_q;  /* queue for interface change notifications */
198
    struct async_queue  accept_q;    /* queue for asynchronous accepts */
199
    struct async_queue  connect_q;   /* queue for asynchronous connects */
200
    struct async_queue  poll_q;      /* queue for asynchronous polls */
201 202
    struct object      *ifchange_obj; /* the interface change notification object */
    struct list         ifchange_entry; /* entry in ifchange notification list */
203 204
    struct list         accept_list; /* list of pending accept requests */
    struct accept_req  *accept_recv_req; /* pending accept-into request which will recv on this socket */
205
    struct connect_req *connect_req; /* pending connection request */
206
    struct poll_req    *main_poll;   /* main poll */
207 208
    union win_sockaddr  addr;        /* socket name */
    int                 addr_len;    /* socket name length */
209
    unsigned int        rcvbuf;      /* advisory recv buffer size */
210
    unsigned int        sndbuf;      /* advisory send buffer size */
211
    unsigned int        rcvtimeo;    /* receive timeout in ms */
212
    unsigned int        sndtimeo;    /* send timeout in ms */
213 214
    unsigned int        rd_shutdown : 1; /* is the read end shut down? */
    unsigned int        wr_shutdown : 1; /* is the write end shut down? */
215
    unsigned int        wr_shutdown_pending : 1; /* is a write shutdown pending? */
216
    unsigned int        hangup : 1;  /* has the read end received a hangup? */
217
    unsigned int        aborted : 1; /* did we get a POLLERR or irregular POLLHUP? */
218
    unsigned int        nonblocking : 1; /* is the socket nonblocking? */
219
    unsigned int        bound : 1;   /* is the socket bound? */
220 221 222
};

static void sock_dump( struct object *obj, int verbose );
223
static struct fd *sock_get_fd( struct object *obj );
224
static int sock_close_handle( struct object *obj, struct process *process, obj_handle_t handle );
225
static void sock_destroy( struct object *obj );
226 227
static struct object *sock_get_ifchange( struct sock *sock );
static void sock_release_ifchange( struct sock *sock );
228 229 230

static int sock_get_poll_events( struct fd *fd );
static void sock_poll_event( struct fd *fd, int event );
231
static enum server_fd_type sock_get_fd_type( struct fd *fd );
232
static void sock_ioctl( struct fd *fd, ioctl_code_t code, struct async *async );
233
static void sock_cancel_async( struct fd *fd, struct async *async );
234
static void sock_queue_async( struct fd *fd, struct async *async, int type, int count );
235
static void sock_reselect_async( struct fd *fd, struct async_queue *queue );
236

237
static int accept_into_socket( struct sock *sock, struct sock *acceptsock );
238
static struct sock *accept_socket( struct sock *sock );
239
static int sock_get_ntstatus( int err );
240
static unsigned int sock_get_error( int err );
241 242
static void poll_socket( struct sock *poll_sock, struct async *async, int exclusive, timeout_t timeout,
                         unsigned int count, const struct afd_poll_socket_64 *sockets );
243 244 245

static const struct object_ops sock_ops =
{
246
    sizeof(struct sock),          /* size */
247
    &file_type,                   /* type */
248 249 250
    sock_dump,                    /* dump */
    add_queue,                    /* add_queue */
    remove_queue,                 /* remove_queue */
251
    default_fd_signaled,          /* signaled */
252
    no_satisfied,                 /* satisfied */
253
    no_signal,                    /* signal */
254
    sock_get_fd,                  /* get_fd */
255
    default_map_access,           /* map_access */
256 257
    default_get_sd,               /* get_sd */
    default_set_sd,               /* set_sd */
258
    no_get_full_name,             /* get_full_name */
259
    no_lookup_name,               /* lookup_name */
260 261
    no_link_name,                 /* link_name */
    NULL,                         /* unlink_name */
262
    no_open_file,                 /* open_file */
263
    no_kernel_obj_list,           /* get_kernel_obj_list */
264
    sock_close_handle,            /* close_handle */
265 266 267 268 269
    sock_destroy                  /* destroy */
};

static const struct fd_ops sock_fd_ops =
{
270 271
    sock_get_poll_events,         /* get_poll_events */
    sock_poll_event,              /* poll_event */
272
    sock_get_fd_type,             /* get_fd_type */
273 274 275
    no_fd_read,                   /* read */
    no_fd_write,                  /* write */
    no_fd_flush,                  /* flush */
276
    default_fd_get_file_info,     /* get_file_info */
277
    no_fd_get_volume_info,        /* get_volume_info */
278
    sock_ioctl,                   /* ioctl */
279
    sock_cancel_async,            /* cancel_async */
280
    sock_queue_async,             /* queue_async */
281
    sock_reselect_async           /* reselect_async */
282 283
};

284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
union unix_sockaddr
{
    struct sockaddr addr;
    struct sockaddr_in in;
    struct sockaddr_in6 in6;
#ifdef HAS_IPX
    struct sockaddr_ipx ipx;
#endif
#ifdef HAS_IRDA
    struct sockaddr_irda irda;
#endif
};

static int sockaddr_from_unix( const union unix_sockaddr *uaddr, struct WS_sockaddr *wsaddr, socklen_t wsaddrlen )
{
    memset( wsaddr, 0, wsaddrlen );

    switch (uaddr->addr.sa_family)
    {
    case AF_INET:
    {
        struct WS_sockaddr_in win = {0};

        if (wsaddrlen < sizeof(win)) return -1;
        win.sin_family = WS_AF_INET;
        win.sin_port = uaddr->in.sin_port;
        memcpy( &win.sin_addr, &uaddr->in.sin_addr, sizeof(win.sin_addr) );
        memcpy( wsaddr, &win, sizeof(win) );
        return sizeof(win);
    }

    case AF_INET6:
    {
        struct WS_sockaddr_in6 win = {0};

319
        if (wsaddrlen < sizeof(win)) return -1;
320 321 322 323 324 325 326
        win.sin6_family = WS_AF_INET6;
        win.sin6_port = uaddr->in6.sin6_port;
        win.sin6_flowinfo = uaddr->in6.sin6_flowinfo;
        memcpy( &win.sin6_addr, &uaddr->in6.sin6_addr, sizeof(win.sin6_addr) );
#ifdef HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID
        win.sin6_scope_id = uaddr->in6.sin6_scope_id;
#endif
327 328
        memcpy( wsaddr, &win, sizeof(win) );
        return sizeof(win);
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
    }

#ifdef HAS_IPX
    case AF_IPX:
    {
        struct WS_sockaddr_ipx win = {0};

        if (wsaddrlen < sizeof(win)) return -1;
        win.sa_family = WS_AF_IPX;
        memcpy( win.sa_netnum, &uaddr->ipx.sipx_network, sizeof(win.sa_netnum) );
        memcpy( win.sa_nodenum, &uaddr->ipx.sipx_node, sizeof(win.sa_nodenum) );
        win.sa_socket = uaddr->ipx.sipx_port;
        memcpy( wsaddr, &win, sizeof(win) );
        return sizeof(win);
    }
#endif

#ifdef HAS_IRDA
    case AF_IRDA:
    {
        SOCKADDR_IRDA win;

        if (wsaddrlen < sizeof(win)) return -1;
        win.irdaAddressFamily = WS_AF_IRDA;
        memcpy( win.irdaDeviceID, &uaddr->irda.sir_addr, sizeof(win.irdaDeviceID) );
        if (uaddr->irda.sir_lsap_sel != LSAP_ANY)
            snprintf( win.irdaServiceName, sizeof(win.irdaServiceName), "LSAP-SEL%u", uaddr->irda.sir_lsap_sel );
        else
            memcpy( win.irdaServiceName, uaddr->irda.sir_name, sizeof(win.irdaServiceName) );
        memcpy( wsaddr, &win, sizeof(win) );
        return sizeof(win);
    }
#endif

    case AF_UNSPEC:
        return 0;

    default:
        return -1;

    }
}
371

372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
static socklen_t sockaddr_to_unix( const struct WS_sockaddr *wsaddr, int wsaddrlen, union unix_sockaddr *uaddr )
{
    memset( uaddr, 0, sizeof(*uaddr) );

    switch (wsaddr->sa_family)
    {
    case WS_AF_INET:
    {
        struct WS_sockaddr_in win = {0};

        if (wsaddrlen < sizeof(win)) return 0;
        memcpy( &win, wsaddr, sizeof(win) );
        uaddr->in.sin_family = AF_INET;
        uaddr->in.sin_port = win.sin_port;
        memcpy( &uaddr->in.sin_addr, &win.sin_addr, sizeof(win.sin_addr) );
        return sizeof(uaddr->in);
    }

    case WS_AF_INET6:
    {
        struct WS_sockaddr_in6 win = {0};

394 395
        if (wsaddrlen < sizeof(win)) return 0;
        memcpy( &win, wsaddr, sizeof(win) );
396 397 398 399 400
        uaddr->in6.sin6_family = AF_INET6;
        uaddr->in6.sin6_port = win.sin6_port;
        uaddr->in6.sin6_flowinfo = win.sin6_flowinfo;
        memcpy( &uaddr->in6.sin6_addr, &win.sin6_addr, sizeof(win.sin6_addr) );
#ifdef HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID
401
        uaddr->in6.sin6_scope_id = win.sin6_scope_id;
402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
#endif
        return sizeof(uaddr->in6);
    }

#ifdef HAS_IPX
    case WS_AF_IPX:
    {
        struct WS_sockaddr_ipx win = {0};

        if (wsaddrlen < sizeof(win)) return 0;
        memcpy( &win, wsaddr, sizeof(win) );
        uaddr->ipx.sipx_family = AF_IPX;
        memcpy( &uaddr->ipx.sipx_network, win.sa_netnum, sizeof(win.sa_netnum) );
        memcpy( &uaddr->ipx.sipx_node, win.sa_nodenum, sizeof(win.sa_nodenum) );
        uaddr->ipx.sipx_port = win.sa_socket;
        return sizeof(uaddr->ipx);
    }
#endif

#ifdef HAS_IRDA
    case WS_AF_IRDA:
    {
        SOCKADDR_IRDA win = {0};
        unsigned int lsap_sel;

        if (wsaddrlen < sizeof(win)) return 0;
        memcpy( &win, wsaddr, sizeof(win) );
        uaddr->irda.sir_family = AF_IRDA;
        if (sscanf( win.irdaServiceName, "LSAP-SEL%u", &lsap_sel ) == 1)
431
            uaddr->irda.sir_lsap_sel = lsap_sel;
432 433
        else
        {
434
            uaddr->irda.sir_lsap_sel = LSAP_ANY;
435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467
            memcpy( uaddr->irda.sir_name, win.irdaServiceName, sizeof(win.irdaServiceName) );
        }
        memcpy( &uaddr->irda.sir_addr, win.irdaDeviceID, sizeof(win.irdaDeviceID) );
        return sizeof(uaddr->irda);
    }
#endif

    case WS_AF_UNSPEC:
        switch (wsaddrlen)
        {
        default: /* likely an ipv4 address */
        case sizeof(struct WS_sockaddr_in):
            return sizeof(uaddr->in);

#ifdef HAS_IPX
        case sizeof(struct WS_sockaddr_ipx):
            return sizeof(uaddr->ipx);
#endif

#ifdef HAS_IRDA
        case sizeof(SOCKADDR_IRDA):
            return sizeof(uaddr->irda);
#endif

        case sizeof(struct WS_sockaddr_in6):
            return sizeof(uaddr->in6);
        }

    default:
        return 0;
    }
}

468 469 470 471 472 473 474 475 476 477 478 479 480
/* some events are generated at the same time but must be sent in a particular
 * order (e.g. CONNECT must be sent before READ) */
static const enum afd_poll_bit event_bitorder[] =
{
    AFD_POLL_BIT_CONNECT,
    AFD_POLL_BIT_CONNECT_ERR,
    AFD_POLL_BIT_ACCEPT,
    AFD_POLL_BIT_OOB,
    AFD_POLL_BIT_WRITE,
    AFD_POLL_BIT_READ,
    AFD_POLL_BIT_RESET,
    AFD_POLL_BIT_HUP,
    AFD_POLL_BIT_CLOSE,
481 482
};

483 484 485 486 487 488 489 490
typedef enum {
    SOCK_SHUTDOWN_ERROR = -1,
    SOCK_SHUTDOWN_EOF = 0,
    SOCK_SHUTDOWN_POLLHUP = 1
} sock_shutdown_t;

static sock_shutdown_t sock_shutdown_type = SOCK_SHUTDOWN_ERROR;

491
static sock_shutdown_t sock_check_pollhup(void)
492 493 494 495 496 497
{
    sock_shutdown_t ret = SOCK_SHUTDOWN_ERROR;
    int fd[2], n;
    struct pollfd pfd;
    char dummy;

498
    if ( socketpair( AF_UNIX, SOCK_STREAM, 0, fd ) ) return ret;
499
    if ( shutdown( fd[0], 1 ) ) goto out;
500 501 502 503 504

    pfd.fd = fd[1];
    pfd.events = POLLIN;
    pfd.revents = 0;

505 506
    /* Solaris' poll() sometimes returns nothing if given a 0ms timeout here */
    n = poll( &pfd, 1, 1 );
507 508 509 510
    if ( n != 1 ) goto out; /* error or timeout */
    if ( pfd.revents & POLLHUP )
        ret = SOCK_SHUTDOWN_POLLHUP;
    else if ( pfd.revents & POLLIN &&
511
              read( fd[1], &dummy, 1 ) == 0 )
512 513 514
        ret = SOCK_SHUTDOWN_EOF;

out:
515 516
    close( fd[0] );
    close( fd[1] );
517 518 519 520 521
    return ret;
}

void sock_init(void)
{
522
    sock_shutdown_type = sock_check_pollhup();
523 524 525 526

    switch ( sock_shutdown_type )
    {
    case SOCK_SHUTDOWN_EOF:
527
        if (debug_level) fprintf( stderr, "sock_init: shutdown() causes EOF\n" );
528 529
        break;
    case SOCK_SHUTDOWN_POLLHUP:
530
        if (debug_level) fprintf( stderr, "sock_init: shutdown() causes POLLHUP\n" );
531 532
        break;
    default:
533
        fprintf( stderr, "sock_init: ERROR in sock_check_pollhup()\n" );
534 535 536
        sock_shutdown_type = SOCK_SHUTDOWN_EOF;
    }
}
537

538
static int sock_reselect( struct sock *sock )
539
{
540
    int ev = sock_get_poll_events( sock->fd );
541

542
    if (debug_level)
543
        fprintf(stderr,"sock_reselect(%p): new mask %x\n", sock, ev);
544

545
    set_fd_events( sock->fd, ev );
546 547 548
    return ev;
}

549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573
static unsigned int afd_poll_flag_to_win32( unsigned int flags )
{
    static const unsigned int map[] =
    {
        FD_READ,    /* READ */
        FD_OOB,     /* OOB */
        FD_WRITE,   /* WRITE */
        FD_CLOSE,   /* HUP */
        FD_CLOSE,   /* RESET */
        0,          /* CLOSE */
        FD_CONNECT, /* CONNECT */
        FD_ACCEPT,  /* ACCEPT */
        FD_CONNECT, /* CONNECT_ERR */
    };

    unsigned int i, ret = 0;

    for (i = 0; i < ARRAY_SIZE(map); ++i)
    {
        if (flags & (1 << i)) ret |= map[i];
    }

    return ret;
}

574
/* wake anybody waiting on the socket event or send the associated message */
575
static void sock_wake_up( struct sock *sock )
576
{
577
    unsigned int events = sock->pending_events & sock->mask;
578
    int i;
579

580 581 582
    if (sock->event)
    {
        if (debug_level) fprintf(stderr, "signalling events %x ptr %p\n", events, sock->event );
583 584
        if (events)
            set_event( sock->event );
585 586 587
    }
    if (sock->window)
    {
588
        if (debug_level) fprintf(stderr, "signalling events %x win %08x\n", events, sock->window );
589
        for (i = 0; i < ARRAY_SIZE(event_bitorder); i++)
590
        {
591
            enum afd_poll_bit event = event_bitorder[i];
592
            if (events & (1 << event))
593
            {
594
                lparam_t lparam = afd_poll_flag_to_win32(1 << event) | (sock_get_error( sock->errors[event] ) << 16);
595
                post_message( sock->window, sock->message, sock->wparam, lparam );
596 597
            }
        }
598
        sock->pending_events = 0;
599 600
        sock_reselect( sock );
    }
601 602
}

603
static inline int sock_error( struct fd *fd )
604
{
605 606
    unsigned int optval = 0;
    socklen_t optlen = sizeof(optval);
607

608
    getsockopt( get_unix_fd(fd), SOL_SOCKET, SO_ERROR, (void *) &optval, &optlen);
609
    return optval;
610 611
}

612
static void free_accept_req( void *private )
613
{
614
    struct accept_req *req = private;
615
    list_remove( &req->entry );
616 617 618 619 620
    if (req->acceptsock)
    {
        req->acceptsock->accept_recv_req = NULL;
        release_object( req->acceptsock );
    }
621
    release_object( req->async );
622
    release_object( req->iosb );
623
    release_object( req->sock );
624 625 626
    free( req );
}

627
static void fill_accept_output( struct accept_req *req )
628
{
629 630
    const data_size_t out_size = req->iosb->out_size;
    struct async *async = req->async;
631 632
    union unix_sockaddr unix_addr;
    struct WS_sockaddr *win_addr;
633
    unsigned int remote_len;
634 635 636 637 638
    socklen_t unix_len;
    int fd, size = 0;
    char *out_data;
    int win_len;

639 640 641 642 643
    if (!(out_data = mem_alloc( out_size )))
    {
        async_terminate( async, get_error() );
        return;
    }
644 645 646 647 648 649 650 651 652 653 654 655

    fd = get_unix_fd( req->acceptsock->fd );

    if (req->recv_len && (size = recv( fd, out_data, req->recv_len, 0 )) < 0)
    {
        if (!req->accepted && errno == EWOULDBLOCK)
        {
            req->accepted = 1;
            sock_reselect( req->acceptsock );
            return;
        }

656
        async_terminate( async, sock_get_ntstatus( errno ) );
657 658 659 660 661 662 663 664
        free( out_data );
        return;
    }

    if (req->local_len)
    {
        if (req->local_len < sizeof(int))
        {
665
            async_terminate( async, STATUS_BUFFER_TOO_SMALL );
666 667 668 669 670 671 672
            free( out_data );
            return;
        }

        unix_len = sizeof(unix_addr);
        win_addr = (struct WS_sockaddr *)(out_data + req->recv_len + sizeof(int));
        if (getsockname( fd, &unix_addr.addr, &unix_len ) < 0 ||
673
            (win_len = sockaddr_from_unix( &unix_addr, win_addr, req->local_len - sizeof(int) )) < 0)
674
        {
675
            async_terminate( async, sock_get_ntstatus( errno ) );
676 677 678 679 680 681 682 683
            free( out_data );
            return;
        }
        memcpy( out_data + req->recv_len, &win_len, sizeof(int) );
    }

    unix_len = sizeof(unix_addr);
    win_addr = (struct WS_sockaddr *)(out_data + req->recv_len + req->local_len + sizeof(int));
684
    remote_len = out_size - req->recv_len - req->local_len;
685
    if (getpeername( fd, &unix_addr.addr, &unix_len ) < 0 ||
686
        (win_len = sockaddr_from_unix( &unix_addr, win_addr, remote_len - sizeof(int) )) < 0)
687
    {
688
        async_terminate( async, sock_get_ntstatus( errno ) );
689 690 691 692 693
        free( out_data );
        return;
    }
    memcpy( out_data + req->recv_len + req->local_len, &win_len, sizeof(int) );

694
    async_request_complete( req->async, STATUS_SUCCESS, size, out_size, out_data );
695 696 697 698 699 700 701 702 703
}

static void complete_async_accept( struct sock *sock, struct accept_req *req )
{
    struct sock *acceptsock = req->acceptsock;
    struct async *async = req->async;

    if (debug_level) fprintf( stderr, "completing accept request for socket %p\n", sock );

704 705
    if (acceptsock)
    {
706 707 708 709 710
        if (!accept_into_socket( sock, acceptsock ))
        {
            async_terminate( async, get_error() );
            return;
        }
711
        fill_accept_output( req );
712 713 714 715 716
    }
    else
    {
        obj_handle_t handle;

717 718 719 720 721
        if (!(acceptsock = accept_socket( sock )))
        {
            async_terminate( async, get_error() );
            return;
        }
722 723 724
        handle = alloc_handle_no_access_check( async_get_thread( async )->process, &acceptsock->obj,
                                               GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE, OBJ_INHERIT );
        acceptsock->wparam = handle;
725
        sock_reselect( acceptsock );
726
        release_object( acceptsock );
727 728 729 730 731
        if (!handle)
        {
            async_terminate( async, get_error() );
            return;
        }
732

733
        async_request_complete_alloc( req->async, STATUS_SUCCESS, 0, sizeof(handle), &handle );
734
    }
735 736 737 738 739 740 741 742
}

static void complete_async_accept_recv( struct accept_req *req )
{
    if (debug_level) fprintf( stderr, "completing accept recv request for socket %p\n", req->acceptsock );

    assert( req->recv_len );

743
    fill_accept_output( req );
744 745
}

746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765
static void free_connect_req( void *private )
{
    struct connect_req *req = private;

    req->sock->connect_req = NULL;
    release_object( req->async );
    release_object( req->iosb );
    release_object( req->sock );
    free( req );
}

static void complete_async_connect( struct sock *sock )
{
    struct connect_req *req = sock->connect_req;
    const char *in_buffer;
    size_t len;
    int ret;

    if (debug_level) fprintf( stderr, "completing connect request for socket %p\n", sock );

766
    sock->state = SOCK_CONNECTED;
767 768 769

    if (!req->send_len)
    {
770
        async_terminate( req->async, STATUS_SUCCESS );
771 772 773
        return;
    }

774
    in_buffer = (const char *)req->iosb->in_data + sizeof(struct afd_connect_params) + req->addr_len;
775 776 777 778
    len = req->send_len - req->send_cursor;

    ret = send( get_unix_fd( sock->fd ), in_buffer + req->send_cursor, len, 0 );
    if (ret < 0 && errno != EWOULDBLOCK)
779
        async_terminate( req->async, sock_get_ntstatus( errno ) );
780
    else if (ret == len)
781
        async_request_complete( req->async, STATUS_SUCCESS, req->send_len, 0, NULL );
782 783 784 785
    else
        req->send_cursor += ret;
}

786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813
static void free_poll_req( void *private )
{
    struct poll_req *req = private;
    unsigned int i;

    if (req->timeout) remove_timeout_user( req->timeout );

    for (i = 0; i < req->count; ++i)
        release_object( req->sockets[i].sock );
    release_object( req->async );
    release_object( req->iosb );
    list_remove( &req->entry );
    free( req );
}

static int is_oobinline( struct sock *sock )
{
    int oobinline;
    socklen_t len = sizeof(oobinline);
    return !getsockopt( get_unix_fd( sock->fd ), SOL_SOCKET, SO_OOBINLINE, (char *)&oobinline, &len ) && oobinline;
}

static int get_poll_flags( struct sock *sock, int event )
{
    int flags = 0;

    /* A connection-mode socket which has never been connected does not return
     * write or hangup events, but Linux reports POLLOUT | POLLHUP. */
814
    if (sock->state == SOCK_UNCONNECTED)
815 816 817 818
        event &= ~(POLLOUT | POLLHUP);

    if (event & POLLIN)
    {
819
        if (sock->state == SOCK_LISTENING)
820 821 822 823 824 825 826 827
            flags |= AFD_POLL_ACCEPT;
        else
            flags |= AFD_POLL_READ;
    }
    if (event & POLLPRI)
        flags |= is_oobinline( sock ) ? AFD_POLL_READ : AFD_POLL_OOB;
    if (event & POLLOUT)
        flags |= AFD_POLL_WRITE;
828
    if (sock->state == SOCK_CONNECTED)
829 830 831 832 833 834 835 836 837
        flags |= AFD_POLL_CONNECT;
    if (event & POLLHUP)
        flags |= AFD_POLL_HUP;
    if (event & POLLERR)
        flags |= AFD_POLL_CONNECT_ERR;

    return flags;
}

838 839
static void complete_async_poll( struct poll_req *req, unsigned int status )
{
840
    unsigned int i, signaled_count = 0;
841 842 843 844 845 846 847 848 849

    for (i = 0; i < req->count; ++i)
    {
        struct sock *sock = req->sockets[i].sock;

        if (sock->main_poll == req)
            sock->main_poll = NULL;
    }

850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908
    if (!status)
    {
        for (i = 0; i < req->count; ++i)
        {
            if (req->sockets[i].flags)
                ++signaled_count;
        }
    }

    if (is_machine_64bit( async_get_thread( req->async )->process->machine ))
    {
        size_t output_size = offsetof( struct afd_poll_params_64, sockets[signaled_count] );
        struct afd_poll_params_64 *output;

        if (!(output = mem_alloc( output_size )))
        {
            async_terminate( req->async, get_error() );
            return;
        }
        memset( output, 0, output_size );
        output->timeout = req->orig_timeout;
        output->exclusive = req->exclusive;
        for (i = 0; i < req->count; ++i)
        {
            if (!req->sockets[i].flags) continue;
            output->sockets[output->count].socket = req->sockets[i].handle;
            output->sockets[output->count].flags = req->sockets[i].flags;
            output->sockets[output->count].status = req->sockets[i].status;
            ++output->count;
        }
        assert( output->count == signaled_count );

        async_request_complete( req->async, status, output_size, output_size, output );
    }
    else
    {
        size_t output_size = offsetof( struct afd_poll_params_32, sockets[signaled_count] );
        struct afd_poll_params_32 *output;

        if (!(output = mem_alloc( output_size )))
        {
            async_terminate( req->async, get_error() );
            return;
        }
        memset( output, 0, output_size );
        output->timeout = req->orig_timeout;
        output->exclusive = req->exclusive;
        for (i = 0; i < req->count; ++i)
        {
            if (!req->sockets[i].flags) continue;
            output->sockets[output->count].socket = req->sockets[i].handle;
            output->sockets[output->count].flags = req->sockets[i].flags;
            output->sockets[output->count].status = req->sockets[i].status;
            ++output->count;
        }
        assert( output->count == signaled_count );

        async_request_complete( req->async, status, output_size, output_size, output );
    }
909 910
}

911 912 913 914 915 916 917 918 919
static void complete_async_polls( struct sock *sock, int event, int error )
{
    int flags = get_poll_flags( sock, event );
    struct poll_req *req, *next;

    LIST_FOR_EACH_ENTRY_SAFE( req, next, &poll_list, struct poll_req, entry )
    {
        unsigned int i;

920
        if (req->iosb->status != STATUS_PENDING) continue;
921 922 923 924

        for (i = 0; i < req->count; ++i)
        {
            if (req->sockets[i].sock != sock) continue;
925
            if (!(req->sockets[i].mask & flags)) continue;
926 927 928

            if (debug_level)
                fprintf( stderr, "completing poll for socket %p, wanted %#x got %#x\n",
929
                         sock, req->sockets[i].mask, flags );
930

931 932
            req->sockets[i].flags = req->sockets[i].mask & flags;
            req->sockets[i].status = sock_get_ntstatus( error );
933

934
            complete_async_poll( req, STATUS_SUCCESS );
935 936 937 938 939 940 941 942 943 944 945
            break;
        }
    }
}

static void async_poll_timeout( void *private )
{
    struct poll_req *req = private;

    req->timeout = NULL;

946
    if (req->iosb->status != STATUS_PENDING) return;
947

948
    complete_async_poll( req, STATUS_TIMEOUT );
949 950
}

951
static int sock_dispatch_asyncs( struct sock *sock, int event, int error )
952
{
953 954 955 956 957 958
    if (event & (POLLIN | POLLPRI))
    {
        struct accept_req *req;

        LIST_FOR_EACH_ENTRY( req, &sock->accept_list, struct accept_req, entry )
        {
959
            if (req->iosb->status == STATUS_PENDING && !req->accepted)
960 961 962 963 964 965
            {
                complete_async_accept( sock, req );
                break;
            }
        }

966
        if (sock->accept_recv_req && sock->accept_recv_req->iosb->status == STATUS_PENDING)
967 968 969
            complete_async_accept_recv( sock->accept_recv_req );
    }

970 971 972
    if ((event & POLLOUT) && sock->connect_req && sock->connect_req->iosb->status == STATUS_PENDING)
        complete_async_connect( sock );

973 974 975 976 977 978 979
    if (event & (POLLIN | POLLPRI) && async_waiting( &sock->read_q ))
    {
        if (debug_level) fprintf( stderr, "activating read queue for socket %p\n", sock );
        async_wake_up( &sock->read_q, STATUS_ALERTED );
        event &= ~(POLLIN | POLLPRI);
    }

980
    if (event & POLLOUT && async_waiting( &sock->write_q ))
981
    {
982 983 984
        if (debug_level) fprintf( stderr, "activating write queue for socket %p\n", sock );
        async_wake_up( &sock->write_q, STATUS_ALERTED );
        event &= ~POLLOUT;
985
    }
986

987 988 989 990 991
    if (event & (POLLERR | POLLHUP))
    {
        int status = sock_get_ntstatus( error );
        struct accept_req *req, *next;

992
        if (sock->rd_shutdown || sock->hangup)
993
            async_wake_up( &sock->read_q, status );
994
        if (sock->wr_shutdown)
995 996 997
            async_wake_up( &sock->write_q, status );

        LIST_FOR_EACH_ENTRY_SAFE( req, next, &sock->accept_list, struct accept_req, entry )
998 999 1000 1001
        {
            if (req->iosb->status == STATUS_PENDING)
                async_terminate( req->async, status );
        }
1002

1003
        if (sock->accept_recv_req && sock->accept_recv_req->iosb->status == STATUS_PENDING)
1004
            async_terminate( sock->accept_recv_req->async, status );
1005 1006 1007

        if (sock->connect_req)
            async_terminate( sock->connect_req->async, status );
1008
    }
1009

1010
    return event;
1011 1012
}

1013
static void post_socket_event( struct sock *sock, enum afd_poll_bit event_bit, int error )
1014 1015 1016
{
    unsigned int event = (1 << event_bit);

1017 1018 1019 1020 1021 1022
    if (!(sock->reported_events & event))
    {
        sock->pending_events |= event;
        sock->reported_events |= event;
        sock->errors[event_bit] = error;
    }
1023 1024
}

1025
static void sock_dispatch_events( struct sock *sock, enum connection_state prevstate, int event, int error )
1026
{
1027
    switch (prevstate)
1028
    {
1029 1030
    case SOCK_UNCONNECTED:
        break;
1031

1032
    case SOCK_CONNECTING:
1033
        if (event & POLLOUT)
1034
        {
1035
            post_socket_event( sock, AFD_POLL_BIT_CONNECT, 0 );
1036 1037
            sock->errors[AFD_POLL_BIT_CONNECT_ERR] = 0;
        }
1038
        if (event & (POLLERR | POLLHUP))
1039
            post_socket_event( sock, AFD_POLL_BIT_CONNECT_ERR, error );
1040
        break;
1041

1042 1043
    case SOCK_LISTENING:
        if (event & (POLLIN | POLLERR | POLLHUP))
1044
            post_socket_event( sock, AFD_POLL_BIT_ACCEPT, error );
1045
        break;
1046

1047 1048 1049
    case SOCK_CONNECTED:
    case SOCK_CONNECTIONLESS:
        if (event & POLLIN)
1050
            post_socket_event( sock, AFD_POLL_BIT_READ, 0 );
1051 1052

        if (event & POLLOUT)
1053
            post_socket_event( sock, AFD_POLL_BIT_WRITE, 0 );
1054

1055
        if (event & POLLPRI)
1056
            post_socket_event( sock, AFD_POLL_BIT_OOB, 0 );
1057 1058

        if (event & (POLLERR | POLLHUP))
1059
            post_socket_event( sock, AFD_POLL_BIT_HUP, error );
1060 1061
        break;
    }
1062

1063 1064 1065
    sock_wake_up( sock );
}

1066
static void sock_poll_event( struct fd *fd, int event )
1067
{
1068
    struct sock *sock = get_fd_user( fd );
1069
    int hangup_seen = 0;
1070
    enum connection_state prevstate = sock->state;
1071
    int error = 0;
1072

1073
    assert( sock->obj.ops == &sock_ops );
1074
    if (debug_level)
1075
        fprintf(stderr, "socket %p select event: %x\n", sock, event);
1076 1077 1078 1079

    /* we may change event later, remove from loop here */
    if (event & (POLLERR|POLLHUP)) set_fd_events( sock->fd, -1 );

1080
    switch (sock->state)
1081
    {
1082 1083 1084 1085
    case SOCK_UNCONNECTED:
        break;

    case SOCK_CONNECTING:
1086
        if (event & (POLLERR|POLLHUP))
1087
        {
1088
            sock->state = SOCK_UNCONNECTED;
1089 1090
            event &= ~POLLOUT;
            error = sock_error( fd );
1091
        }
1092
        else if (event & POLLOUT)
1093
        {
1094
            sock->state = SOCK_CONNECTED;
1095
            sock->connect_time = current_time;
1096
        }
1097 1098 1099
        break;

    case SOCK_LISTENING:
1100 1101
        if (event & (POLLERR|POLLHUP))
            error = sock_error( fd );
1102 1103 1104 1105
        break;

    case SOCK_CONNECTED:
    case SOCK_CONNECTIONLESS:
1106
        if (sock->type == WS_SOCK_STREAM && (event & POLLIN))
1107
        {
1108
            char dummy;
1109
            int nr;
1110 1111 1112

            /* Linux 2.4 doesn't report POLLHUP if only one side of the socket
             * has been closed, so we need to check for it explicitly here */
1113
            nr  = recv( get_unix_fd( fd ), &dummy, 1, MSG_PEEK );
1114
            if ( nr == 0 )
1115
            {
1116
                hangup_seen = 1;
1117 1118
                event &= ~POLLIN;
            }
1119
            else if ( nr < 0 )
1120
            {
1121
                event &= ~POLLIN;
1122 1123
                /* EAGAIN can happen if an async recv() falls between the server's poll()
                   call and the invocation of this routine */
1124
                if ( errno != EAGAIN )
1125
                {
1126 1127
                    error = errno;
                    event |= POLLERR;
1128
                    if ( debug_level )
1129
                        fprintf( stderr, "recv error on socket %p: %d\n", sock, errno );
1130 1131
                }
            }
1132
        }
1133

1134 1135 1136 1137 1138
        if (hangup_seen || (sock_shutdown_type == SOCK_SHUTDOWN_POLLHUP && (event & POLLHUP)))
        {
            sock->hangup = 1;
        }
        else if (event & (POLLHUP | POLLERR))
1139
        {
1140
            sock->aborted = 1;
1141

1142
            if (debug_level)
1143
                fprintf( stderr, "socket %p aborted by error %d, event %#x\n", sock, error, event );
1144 1145
        }

1146 1147
        if (hangup_seen)
            event |= POLLHUP;
1148
        break;
1149
    }
1150

1151 1152
    complete_async_polls( sock, event, error );

1153
    event = sock_dispatch_asyncs( sock, event, error );
1154
    sock_dispatch_events( sock, prevstate, event, error );
1155

1156
    sock_reselect( sock );
1157 1158 1159 1160 1161 1162
}

static void sock_dump( struct object *obj, int verbose )
{
    struct sock *sock = (struct sock *)obj;
    assert( obj->ops == &sock_ops );
1163
    fprintf( stderr, "Socket fd=%p, state=%x, mask=%x, pending=%x, reported=%x\n",
1164
            sock->fd, sock->state,
1165
            sock->mask, sock->pending_events, sock->reported_events );
1166 1167
}

1168 1169 1170 1171 1172 1173 1174
static int poll_flags_from_afd( struct sock *sock, int flags )
{
    int ev = 0;

    /* A connection-mode socket which has never been connected does
     * not return write or hangup events, but Linux returns
     * POLLOUT | POLLHUP. */
1175
    if (sock->state == SOCK_UNCONNECTED)
1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189
        return -1;

    if (flags & (AFD_POLL_READ | AFD_POLL_ACCEPT))
        ev |= POLLIN;
    if ((flags & AFD_POLL_HUP) && sock->type == WS_SOCK_STREAM)
        ev |= POLLIN;
    if (flags & AFD_POLL_OOB)
        ev |= is_oobinline( sock ) ? POLLIN : POLLPRI;
    if (flags & AFD_POLL_WRITE)
        ev |= POLLOUT;

    return ev;
}

1190
static int sock_get_poll_events( struct fd *fd )
1191
{
1192
    struct sock *sock = get_fd_user( fd );
1193
    unsigned int mask = sock->mask & ~sock->reported_events;
1194
    struct poll_req *req;
1195 1196
    int ev = 0;

1197
    assert( sock->obj.ops == &sock_ops );
1198

1199 1200 1201
    if (!sock->type) /* not initialized yet */
        return -1;

1202 1203 1204 1205 1206 1207 1208
    switch (sock->state)
    {
    case SOCK_UNCONNECTED:
        /* A connection-mode Windows socket which has never been connected does
         * not return any events, but Linux returns POLLOUT | POLLHUP. Hence we
         * need to return -1 here, to prevent the socket from being polled on at
         * all. */
1209 1210
        return -1;

1211
    case SOCK_CONNECTING:
1212 1213
        return POLLOUT;

1214
    case SOCK_LISTENING:
1215
        if (!list_empty( &sock->accept_list ) || (mask & AFD_POLL_ACCEPT))
1216
            ev |= POLLIN;
1217
        break;
1218

1219 1220
    case SOCK_CONNECTED:
    case SOCK_CONNECTIONLESS:
1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234
        if (sock->hangup && sock->wr_shutdown && !sock->wr_shutdown_pending)
        {
            /* Linux returns POLLHUP if a socket is both SHUT_RD and SHUT_WR, or
             * if both the socket and its peer are SHUT_WR.
             *
             * We don't use SHUT_RD, so we can only encounter this in the latter
             * case. In that case there can't be any pending read requests (they
             * would have already been completed with a length of zero), the
             * above condition ensures that we don't have any pending write
             * requests, and nothing that can change about the socket state that
             * would complete a pending poll request. */
            return -1;
        }

1235 1236 1237
        if (sock->aborted)
            return -1;

1238 1239 1240 1241 1242 1243 1244 1245
        if (sock->accept_recv_req)
        {
            ev |= POLLIN;
        }
        else if (async_queued( &sock->read_q ))
        {
            if (async_waiting( &sock->read_q )) ev |= POLLIN | POLLPRI;
        }
1246 1247
        else
        {
1248 1249 1250
            /* Don't ask for POLLIN if we got a hangup. We won't receive more
             * data anyway, but we will get POLLIN if SOCK_SHUTDOWN_EOF. */
            if (!sock->hangup)
1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261
            {
                if (mask & AFD_POLL_READ)
                    ev |= POLLIN;
                if (mask & AFD_POLL_OOB)
                    ev |= POLLPRI;
            }

            /* We use POLLIN with 0 bytes recv() as hangup indication for stream sockets. */
            if (sock->state == SOCK_CONNECTED && (mask & AFD_POLL_HUP) && !(sock->reported_events & AFD_POLL_READ))
                ev |= POLLIN;
        }
1262 1263 1264 1265 1266

        if (async_queued( &sock->write_q ))
        {
            if (async_waiting( &sock->write_q )) ev |= POLLOUT;
        }
1267
        else if (!sock->wr_shutdown && (mask & AFD_POLL_WRITE))
1268 1269 1270 1271 1272
        {
            ev |= POLLOUT;
        }

        break;
1273 1274
    }

1275 1276 1277 1278 1279 1280 1281 1282
    LIST_FOR_EACH_ENTRY( req, &poll_list, struct poll_req, entry )
    {
        unsigned int i;

        for (i = 0; i < req->count; ++i)
        {
            if (req->sockets[i].sock != sock) continue;

1283
            ev |= poll_flags_from_afd( sock, req->sockets[i].mask );
1284 1285 1286
        }
    }

1287
    return ev;
1288 1289
}

1290
static enum server_fd_type sock_get_fd_type( struct fd *fd )
1291
{
1292
    return FD_TYPE_SOCKET;
1293 1294
}

1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317
static void sock_cancel_async( struct fd *fd, struct async *async )
{
    struct poll_req *req;

    LIST_FOR_EACH_ENTRY( req, &poll_list, struct poll_req, entry )
    {
        unsigned int i;

        if (req->async != async)
            continue;

        for (i = 0; i < req->count; i++)
        {
            struct sock *sock = req->sockets[i].sock;

            if (sock->main_poll == req)
                sock->main_poll = NULL;
        }
    }

    async_terminate( async, STATUS_CANCELLED );
}

1318
static void sock_queue_async( struct fd *fd, struct async *async, int type, int count )
1319
{
1320
    struct sock *sock = get_fd_user( fd );
1321
    struct async_queue *queue;
1322

1323
    assert( sock->obj.ops == &sock_ops );
1324

1325
    switch (type)
1326 1327
    {
    case ASYNC_TYPE_READ:
1328 1329 1330 1331 1332
        if (sock->rd_shutdown)
        {
            set_error( STATUS_PIPE_DISCONNECTED );
            return;
        }
1333
        queue = &sock->read_q;
1334
        break;
1335

1336
    case ASYNC_TYPE_WRITE:
1337 1338 1339 1340 1341
        if (sock->wr_shutdown)
        {
            set_error( STATUS_PIPE_DISCONNECTED );
            return;
        }
1342
        queue = &sock->write_q;
1343
        break;
1344

1345 1346 1347 1348 1349
    default:
        set_error( STATUS_INVALID_PARAMETER );
        return;
    }

1350
    if (sock->state != SOCK_CONNECTED)
1351
    {
1352
        set_error( STATUS_PIPE_DISCONNECTED );
1353
        return;
1354
    }
1355

1356
    queue_async( queue, async );
1357
    sock_reselect( sock );
1358 1359

    set_error( STATUS_PENDING );
1360 1361
}

1362 1363 1364
static void sock_reselect_async( struct fd *fd, struct async_queue *queue )
{
    struct sock *sock = get_fd_user( fd );
1365

1366
    if (sock->wr_shutdown_pending && list_empty( &sock->write_q.queue ))
1367
    {
1368
        shutdown( get_unix_fd( sock->fd ), SHUT_WR );
1369 1370
        sock->wr_shutdown_pending = 0;
    }
1371

1372 1373 1374 1375
    /* Don't reselect the ifchange queue; we always ask for POLLIN.
     * Don't reselect an uninitialized socket; we can't call set_fd_events() on
     * a pseudo-fd. */
    if (queue != &sock->ifchange_q && sock->type)
1376
        sock_reselect( sock );
1377 1378
}

1379 1380 1381 1382 1383 1384
static struct fd *sock_get_fd( struct object *obj )
{
    struct sock *sock = (struct sock *)obj;
    return (struct fd *)grab_object( sock->fd );
}

1385
static int sock_close_handle( struct object *obj, struct process *process, obj_handle_t handle )
1386 1387
{
    struct sock *sock = (struct sock *)obj;
1388

1389 1390
    if (sock->obj.handle_count == 1) /* last handle */
    {
1391 1392 1393
        struct accept_req *accept_req, *accept_next;
        struct poll_req *poll_req, *poll_next;

1394 1395 1396
        if (sock->accept_recv_req)
            async_terminate( sock->accept_recv_req->async, STATUS_CANCELLED );

1397 1398
        LIST_FOR_EACH_ENTRY_SAFE( accept_req, accept_next, &sock->accept_list, struct accept_req, entry )
            async_terminate( accept_req->async, STATUS_CANCELLED );
1399 1400 1401

        if (sock->connect_req)
            async_terminate( sock->connect_req->async, STATUS_CANCELLED );
1402 1403 1404 1405

        LIST_FOR_EACH_ENTRY_SAFE( poll_req, poll_next, &poll_list, struct poll_req, entry )
        {
            struct iosb *iosb = poll_req->iosb;
1406
            BOOL signaled = FALSE;
1407 1408 1409 1410 1411 1412 1413 1414
            unsigned int i;

            if (iosb->status != STATUS_PENDING) continue;

            for (i = 0; i < poll_req->count; ++i)
            {
                if (poll_req->sockets[i].sock == sock)
                {
1415
                    signaled = TRUE;
1416 1417
                    poll_req->sockets[i].flags = AFD_POLL_CLOSE;
                    poll_req->sockets[i].status = 0;
1418 1419 1420
                }
            }

1421
            if (signaled) complete_async_poll( poll_req, STATUS_SUCCESS );
1422
        }
1423 1424
    }

1425
    return 1;
1426 1427 1428 1429 1430 1431
}

static void sock_destroy( struct object *obj )
{
    struct sock *sock = (struct sock *)obj;

1432 1433 1434
    assert( obj->ops == &sock_ops );

    /* FIXME: special socket shutdown stuff? */
1435

1436
    if ( sock->deferred )
1437
        release_object( sock->deferred );
1438

1439
    async_wake_up( &sock->ifchange_q, STATUS_CANCELLED );
1440
    sock_release_ifchange( sock );
1441 1442 1443
    free_async_queue( &sock->read_q );
    free_async_queue( &sock->write_q );
    free_async_queue( &sock->ifchange_q );
1444
    free_async_queue( &sock->accept_q );
1445
    free_async_queue( &sock->connect_q );
1446
    free_async_queue( &sock->poll_q );
1447
    if (sock->event) release_object( sock->event );
1448 1449 1450 1451 1452 1453
    if (sock->fd)
    {
        /* shut the socket down to force pending poll() calls in the client to return */
        shutdown( get_unix_fd(sock->fd), SHUT_RDWR );
        release_object( sock->fd );
    }
1454 1455
}

1456
static struct sock *create_socket(void)
1457
{
1458 1459 1460 1461
    struct sock *sock;

    if (!(sock = alloc_object( &sock_ops ))) return NULL;
    sock->fd      = NULL;
1462
    sock->state   = SOCK_UNCONNECTED;
1463
    sock->mask    = 0;
1464
    sock->pending_events = 0;
1465
    sock->reported_events = 0;
1466
    sock->flags   = 0;
1467
    sock->proto   = 0;
1468 1469 1470 1471 1472 1473
    sock->type    = 0;
    sock->family  = 0;
    sock->event   = NULL;
    sock->window  = 0;
    sock->message = 0;
    sock->wparam  = 0;
1474
    sock->connect_time = 0;
1475
    sock->deferred = NULL;
1476
    sock->ifchange_obj = NULL;
1477
    sock->accept_recv_req = NULL;
1478
    sock->connect_req = NULL;
1479
    sock->main_poll = NULL;
1480 1481
    memset( &sock->addr, 0, sizeof(sock->addr) );
    sock->addr_len = 0;
1482 1483
    sock->rd_shutdown = 0;
    sock->wr_shutdown = 0;
1484
    sock->wr_shutdown_pending = 0;
1485
    sock->hangup = 0;
1486
    sock->aborted = 0;
1487
    sock->nonblocking = 0;
1488
    sock->bound = 0;
1489
    sock->rcvbuf = 0;
1490
    sock->sndbuf = 0;
1491
    sock->rcvtimeo = 0;
1492
    sock->sndtimeo = 0;
1493 1494 1495
    init_async_queue( &sock->read_q );
    init_async_queue( &sock->write_q );
    init_async_queue( &sock->ifchange_q );
1496
    init_async_queue( &sock->accept_q );
1497
    init_async_queue( &sock->connect_q );
1498
    init_async_queue( &sock->poll_q );
1499
    memset( sock->errors, 0, sizeof(sock->errors) );
1500
    list_init( &sock->accept_list );
1501
    return sock;
1502 1503
}

1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541
static int get_unix_family( int family )
{
    switch (family)
    {
        case WS_AF_INET: return AF_INET;
        case WS_AF_INET6: return AF_INET6;
#ifdef HAS_IPX
        case WS_AF_IPX: return AF_IPX;
#endif
#ifdef AF_IRDA
        case WS_AF_IRDA: return AF_IRDA;
#endif
        case WS_AF_UNSPEC: return AF_UNSPEC;
        default: return -1;
    }
}

static int get_unix_type( int type )
{
    switch (type)
    {
        case WS_SOCK_DGRAM: return SOCK_DGRAM;
        case WS_SOCK_RAW: return SOCK_RAW;
        case WS_SOCK_STREAM: return SOCK_STREAM;
        default: return -1;
    }
}

static int get_unix_protocol( int protocol )
{
    if (protocol >= WS_NSPROTO_IPX && protocol <= WS_NSPROTO_IPX + 255)
        return protocol;

    switch (protocol)
    {
        case WS_IPPROTO_ICMP: return IPPROTO_ICMP;
        case WS_IPPROTO_IGMP: return IPPROTO_IGMP;
        case WS_IPPROTO_IP: return IPPROTO_IP;
1542
        case WS_IPPROTO_IPV4: return IPPROTO_IPIP;
1543 1544 1545 1546 1547 1548 1549 1550
        case WS_IPPROTO_IPV6: return IPPROTO_IPV6;
        case WS_IPPROTO_RAW: return IPPROTO_RAW;
        case WS_IPPROTO_TCP: return IPPROTO_TCP;
        case WS_IPPROTO_UDP: return IPPROTO_UDP;
        default: return -1;
    }
}

1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580
static void set_dont_fragment( int fd, int level, int value )
{
    int optname;

    if (level == IPPROTO_IP)
    {
#ifdef IP_DONTFRAG
        optname = IP_DONTFRAG;
#elif defined(IP_MTU_DISCOVER) && defined(IP_PMTUDISC_DO) && defined(IP_PMTUDISC_DONT)
        optname = IP_MTU_DISCOVER;
        value = value ? IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
#else
        return;
#endif
    }
    else
    {
#ifdef IPV6_DONTFRAG
        optname = IPV6_DONTFRAG;
#elif defined(IPV6_MTU_DISCOVER) && defined(IPV6_PMTUDISC_DO) && defined(IPV6_PMTUDISC_DONT)
        optname = IPV6_MTU_DISCOVER;
        value = value ? IPV6_PMTUDISC_DO : IPV6_PMTUDISC_DONT;
#else
        return;
#endif
    }

    setsockopt( fd, level, optname, &value, sizeof(value) );
}

1581
static int init_socket( struct sock *sock, int family, int type, int protocol, unsigned int flags )
1582
{
1583
    unsigned int options = 0;
1584 1585
    int sockfd, unix_type, unix_family, unix_protocol, value;
    socklen_t len;
1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606

    unix_family = get_unix_family( family );
    unix_type = get_unix_type( type );
    unix_protocol = get_unix_protocol( protocol );

    if (unix_protocol < 0)
    {
        if (type && unix_type < 0)
            set_win32_error( WSAESOCKTNOSUPPORT );
        else
            set_win32_error( WSAEPROTONOSUPPORT );
        return -1;
    }
    if (unix_family < 0)
    {
        if (family >= 0 && unix_type < 0)
            set_win32_error( WSAESOCKTNOSUPPORT );
        else
            set_win32_error( WSAEAFNOSUPPORT );
        return -1;
    }
1607

1608
    sockfd = socket( unix_family, unix_type, unix_protocol );
1609 1610
    if (sockfd == -1)
    {
1611 1612
        if (errno == EINVAL) set_win32_error( WSAESOCKTNOSUPPORT );
        else set_win32_error( sock_get_error( errno ));
1613
        return -1;
1614 1615
    }
    fcntl(sockfd, F_SETFL, O_NONBLOCK); /* make socket nonblocking */
1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650

    if (family == WS_AF_IPX && protocol >= WS_NSPROTO_IPX && protocol <= WS_NSPROTO_IPX + 255)
    {
#ifdef HAS_IPX
        int ipx_type = protocol - WS_NSPROTO_IPX;

#ifdef SOL_IPX
        setsockopt( sockfd, SOL_IPX, IPX_TYPE, &ipx_type, sizeof(ipx_type) );
#else
        struct ipx val;
        /* Should we retrieve val using a getsockopt call and then
         * set the modified one? */
        val.ipx_pt = ipx_type;
        setsockopt( sockfd, 0, SO_DEFAULT_HEADERS, &val, sizeof(val) );
#endif
#endif
    }

    if (unix_family == AF_INET || unix_family == AF_INET6)
    {
        /* ensure IP_DONTFRAGMENT is disabled for SOCK_DGRAM and SOCK_RAW, enabled for SOCK_STREAM */
        if (unix_type == SOCK_DGRAM || unix_type == SOCK_RAW) /* in Linux the global default can be enabled */
            set_dont_fragment( sockfd, unix_family == AF_INET6 ? IPPROTO_IPV6 : IPPROTO_IP, FALSE );
        else if (unix_type == SOCK_STREAM)
            set_dont_fragment( sockfd, unix_family == AF_INET6 ? IPPROTO_IPV6 : IPPROTO_IP, TRUE );
    }

#ifdef IPV6_V6ONLY
    if (unix_family == AF_INET6)
    {
        static const int enable = 1;
        setsockopt( sockfd, IPPROTO_IPV6, IPV6_V6ONLY, &enable, sizeof(enable) );
    }
#endif

1651 1652 1653 1654
    len = sizeof(value);
    if (!getsockopt( sockfd, SOL_SOCKET, SO_RCVBUF, &value, &len ))
        sock->rcvbuf = value;

1655 1656 1657 1658
    len = sizeof(value);
    if (!getsockopt( sockfd, SOL_SOCKET, SO_SNDBUF, &value, &len ))
        sock->sndbuf = value;

1659
    sock->state  = (type == WS_SOCK_STREAM ? SOCK_UNCONNECTED : SOCK_CONNECTIONLESS);
1660
    sock->flags  = flags;
1661 1662 1663
    sock->proto  = protocol;
    sock->type   = type;
    sock->family = family;
1664

1665 1666 1667 1668 1669
    if (sock->fd)
    {
        options = get_fd_options( sock->fd );
        release_object( sock->fd );
    }
1670

1671
    if (!(sock->fd = create_anonymous_fd( &sock_fd_ops, sockfd, &sock->obj, options )))
1672
    {
1673
        return -1;
1674
    }
1675 1676 1677 1678 1679

    /* We can't immediately allow caching for a connection-mode socket, since it
     * might be accepted into (changing the underlying fd object.) */
    if (sock->type != WS_SOCK_STREAM) allow_fd_caching( sock->fd );

1680
    return 0;
1681 1682
}

1683 1684 1685 1686 1687 1688 1689 1690 1691
/* accepts a socket and inits it */
static int accept_new_fd( struct sock *sock )
{

    /* Try to accept(2). We can't be safe that this an already connected socket
     * or that accept() is allowed on it. In those cases we will get -1/errno
     * return.
     */
    struct sockaddr saddr;
1692
    socklen_t slen = sizeof(saddr);
1693 1694 1695 1696
    int acceptfd = accept( get_unix_fd(sock->fd), &saddr, &slen );
    if (acceptfd != -1)
        fcntl( acceptfd, F_SETFL, O_NONBLOCK );
    else
1697
        set_error( sock_get_ntstatus( errno ));
1698 1699 1700
    return acceptfd;
}

1701
/* accept a socket (creates a new fd) */
1702
static struct sock *accept_socket( struct sock *sock )
1703 1704 1705 1706
{
    struct sock *acceptsock;
    int	acceptfd;

1707 1708
    if (get_unix_fd( sock->fd ) == -1) return NULL;

1709 1710
    if ( sock->deferred )
    {
1711 1712
        acceptsock = sock->deferred;
        sock->deferred = NULL;
1713 1714 1715
    }
    else
    {
1716 1717 1718
        union unix_sockaddr unix_addr;
        socklen_t unix_len;

1719
        if ((acceptfd = accept_new_fd( sock )) == -1) return NULL;
1720
        if (!(acceptsock = create_socket()))
1721
        {
1722
            close( acceptfd );
1723 1724
            return NULL;
        }
1725

1726
        /* newly created socket gets the same properties of the listening socket */
1727
        acceptsock->state   = SOCK_CONNECTED;
1728
        acceptsock->bound   = 1;
1729
        acceptsock->nonblocking = sock->nonblocking;
1730
        acceptsock->mask    = sock->mask;
1731
        acceptsock->proto   = sock->proto;
1732 1733
        acceptsock->type    = sock->type;
        acceptsock->family  = sock->family;
1734 1735
        acceptsock->window  = sock->window;
        acceptsock->message = sock->message;
1736
        acceptsock->connect_time = current_time;
1737 1738
        if (sock->event) acceptsock->event = (struct event *)grab_object( sock->event );
        acceptsock->flags = sock->flags;
1739 1740
        if (!(acceptsock->fd = create_anonymous_fd( &sock_fd_ops, acceptfd, &acceptsock->obj,
                                                    get_fd_options( sock->fd ) )))
1741 1742 1743 1744
        {
            release_object( acceptsock );
            return NULL;
        }
1745 1746 1747
        unix_len = sizeof(unix_addr);
        if (!getsockname( acceptfd, &unix_addr.addr, &unix_len ))
            acceptsock->addr_len = sockaddr_from_unix( &unix_addr, &acceptsock->addr.addr, sizeof(acceptsock->addr) );
1748
    }
1749
    clear_error();
1750 1751
    sock->pending_events &= ~AFD_POLL_ACCEPT;
    sock->reported_events &= ~AFD_POLL_ACCEPT;
1752
    sock_reselect( sock );
1753
    return acceptsock;
1754 1755
}

1756 1757
static int accept_into_socket( struct sock *sock, struct sock *acceptsock )
{
1758 1759
    union unix_sockaddr unix_addr;
    socklen_t unix_len;
1760 1761
    int acceptfd;
    struct fd *newfd;
1762 1763 1764

    if (get_unix_fd( sock->fd ) == -1) return FALSE;

1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786
    if ( sock->deferred )
    {
        newfd = dup_fd_object( sock->deferred->fd, 0, 0,
                               get_fd_options( acceptsock->fd ) );
        if ( !newfd )
            return FALSE;

        set_fd_user( newfd, &sock_fd_ops, &acceptsock->obj );

        release_object( sock->deferred );
        sock->deferred = NULL;
    }
    else
    {
        if ((acceptfd = accept_new_fd( sock )) == -1)
            return FALSE;

        if (!(newfd = create_anonymous_fd( &sock_fd_ops, acceptfd, &acceptsock->obj,
                                            get_fd_options( acceptsock->fd ) )))
            return FALSE;
    }

1787
    acceptsock->state = SOCK_CONNECTED;
1788
    acceptsock->pending_events = 0;
1789
    acceptsock->reported_events = 0;
1790
    acceptsock->proto   = sock->proto;
1791 1792 1793 1794
    acceptsock->type    = sock->type;
    acceptsock->family  = sock->family;
    acceptsock->wparam  = 0;
    acceptsock->deferred = NULL;
1795
    acceptsock->connect_time = current_time;
1796
    fd_copy_completion( acceptsock->fd, newfd );
1797
    release_object( acceptsock->fd );
1798 1799
    acceptsock->fd = newfd;

1800 1801 1802 1803
    unix_len = sizeof(unix_addr);
    if (!getsockname( get_unix_fd( newfd ), &unix_addr.addr, &unix_len ))
        acceptsock->addr_len = sockaddr_from_unix( &unix_addr, &acceptsock->addr.addr, sizeof(acceptsock->addr) );

1804
    clear_error();
1805 1806
    sock->pending_events &= ~AFD_POLL_ACCEPT;
    sock->reported_events &= ~AFD_POLL_ACCEPT;
1807 1808 1809 1810 1811
    sock_reselect( sock );

    return TRUE;
}

1812 1813
#ifdef IP_BOUND_IF

1814
static int bind_to_iface_name( int fd, in_addr_t bind_addr, const char *name )
1815
{
1816 1817 1818 1819 1820 1821 1822 1823 1824 1825
    static const int enable = 1;
    unsigned int index;

    if (!(index = if_nametoindex( name )))
        return -1;

    if (setsockopt( fd, IPPROTO_IP, IP_BOUND_IF, &index, sizeof(index) ))
        return -1;

    return setsockopt( fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable) );
1826 1827
}

1828
#elif defined(IP_UNICAST_IF) && defined(SO_ATTACH_FILTER) && defined(SO_BINDTODEVICE)
1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860

struct interface_filter
{
    struct sock_filter iface_memaddr;
    struct sock_filter iface_rule;
    struct sock_filter ip_memaddr;
    struct sock_filter ip_rule;
    struct sock_filter return_keep;
    struct sock_filter return_dump;
};
# define FILTER_JUMP_DUMP(here)  (u_char)(offsetof(struct interface_filter, return_dump) \
                                 -offsetof(struct interface_filter, here)-sizeof(struct sock_filter)) \
                                 /sizeof(struct sock_filter)
# define FILTER_JUMP_KEEP(here)  (u_char)(offsetof(struct interface_filter, return_keep) \
                                 -offsetof(struct interface_filter, here)-sizeof(struct sock_filter)) \
                                 /sizeof(struct sock_filter)
# define FILTER_JUMP_NEXT()      (u_char)(0)
# define SKF_NET_DESTIP          16 /* offset in the network header to the destination IP */
static struct interface_filter generic_interface_filter =
{
    /* This filter rule allows incoming packets on the specified interface, which works for all
     * remotely generated packets and for locally generated broadcast packets. */
    BPF_STMT(BPF_LD+BPF_W+BPF_ABS, SKF_AD_OFF+SKF_AD_IFINDEX),
    BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0xdeadbeef, FILTER_JUMP_KEEP(iface_rule), FILTER_JUMP_NEXT()),
    /* This rule allows locally generated packets targeted at the specific IP address of the chosen
     * adapter (local packets not destined for the broadcast address do not have IFINDEX set) */
    BPF_STMT(BPF_LD+BPF_W+BPF_ABS, SKF_NET_OFF+SKF_NET_DESTIP),
    BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0xdeadbeef, FILTER_JUMP_KEEP(ip_rule), FILTER_JUMP_DUMP(ip_rule)),
    BPF_STMT(BPF_RET+BPF_K, (u_int)-1), /* keep packet */
    BPF_STMT(BPF_RET+BPF_K, 0)          /* dump packet */
};

1861
static int bind_to_iface_name( int fd, in_addr_t bind_addr, const char *name )
1862 1863 1864
{
    struct interface_filter specific_interface_filter;
    struct sock_fprog filter_prog;
1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878
    static const int enable = 1;
    unsigned int index;
    in_addr_t ifindex;

    if (!setsockopt( fd, SOL_SOCKET, SO_BINDTODEVICE, name, strlen( name ) + 1 ))
        return 0;

    /* SO_BINDTODEVICE requires NET_CAP_RAW until Linux 5.7. */
    if (debug_level)
        fprintf( stderr, "setsockopt SO_BINDTODEVICE fd %d, name %s failed: %s, falling back to SO_REUSE_ADDR\n",
                 fd, name, strerror( errno ));

    if (!(index = if_nametoindex( name )))
        return -1;
1879

1880 1881 1882
    ifindex = htonl( index );
    if (setsockopt( fd, IPPROTO_IP, IP_UNICAST_IF, &ifindex, sizeof(ifindex) ) < 0)
        return -1;
1883 1884 1885 1886 1887 1888

    specific_interface_filter = generic_interface_filter;
    specific_interface_filter.iface_rule.k = index;
    specific_interface_filter.ip_rule.k = htonl( bind_addr );
    filter_prog.len = sizeof(generic_interface_filter) / sizeof(struct sock_filter);
    filter_prog.filter = (struct sock_filter *)&specific_interface_filter;
1889 1890 1891 1892
    if (setsockopt( fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter_prog, sizeof(filter_prog) ))
        return -1;

    return setsockopt( fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable) );
1893 1894 1895 1896
}

#else

1897
static int bind_to_iface_name( int fd, in_addr_t bind_addr, const char *name )
1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922
{
    errno = EOPNOTSUPP;
    return -1;
}

#endif /* LINUX_BOUND_IF */

/* Take bind() calls on any name corresponding to a local network adapter and
 * restrict the given socket to operating only on the specified interface. This
 * restriction consists of two components:
 *  1) An outgoing packet restriction suggesting the egress interface for all
 *     packets.
 *  2) An incoming packet restriction dropping packets not meant for the
 *     interface.
 * If the function succeeds in placing these restrictions, then the name for the
 * bind() may safely be changed to INADDR_ANY, permitting the transmission and
 * receipt of broadcast packets on the socket. This behavior is only relevant to
 * UDP sockets and is needed for applications that expect to be able to receive
 * broadcast packets on a socket that is bound to a specific network interface.
 */
static int bind_to_interface( struct sock *sock, const struct sockaddr_in *addr )
{
    in_addr_t bind_addr = addr->sin_addr.s_addr;
    struct ifaddrs *ifaddrs, *ifaddr;
    int fd = get_unix_fd( sock->fd );
1923
    int err = 0;
1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936

    if (bind_addr == htonl( INADDR_ANY ) || bind_addr == htonl( INADDR_LOOPBACK ))
        return 0;
    if (sock->type != WS_SOCK_DGRAM)
        return 0;

    if (getifaddrs( &ifaddrs ) < 0) return 0;

    for (ifaddr = ifaddrs; ifaddr != NULL; ifaddr = ifaddr->ifa_next)
    {
        if (ifaddr->ifa_addr && ifaddr->ifa_addr->sa_family == AF_INET
                && ((struct sockaddr_in *)ifaddr->ifa_addr)->sin_addr.s_addr == bind_addr)
        {
1937
            if ((err = bind_to_iface_name( fd, bind_addr, ifaddr->ifa_name )) < 0)
1938 1939 1940 1941
            {
                if (debug_level)
                    fprintf( stderr, "failed to bind to interface: %s\n", strerror( errno ) );
            }
1942
            break;
1943 1944
        }
    }
1945
    freeifaddrs( ifaddrs );
1946
    return !err;
1947 1948
}

1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980
#ifdef HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID
static unsigned int get_ipv6_interface_index( const struct in6_addr *addr )
{
    struct ifaddrs *ifaddrs, *ifaddr;

    if (getifaddrs( &ifaddrs ) < 0) return 0;

    for (ifaddr = ifaddrs; ifaddr != NULL; ifaddr = ifaddr->ifa_next)
    {
        if (ifaddr->ifa_addr && ifaddr->ifa_addr->sa_family == AF_INET6
                && !memcmp( &((struct sockaddr_in6 *)ifaddr->ifa_addr)->sin6_addr, addr, sizeof(*addr) ))
        {
            unsigned int index = if_nametoindex( ifaddr->ifa_name );

            if (!index)
            {
                if (debug_level)
                    fprintf( stderr, "Unable to look up interface index for %s: %s\n",
                             ifaddr->ifa_name, strerror( errno ) );
                continue;
            }

            freeifaddrs( ifaddrs );
            return index;
        }
    }

    freeifaddrs( ifaddrs );
    return 0;
}
#endif

Bruno Jesus's avatar
Bruno Jesus committed
1981
/* return an errno value mapped to a WSA error */
1982
static unsigned int sock_get_error( int err )
1983
{
1984
    switch (err)
1985
    {
1986 1987
        case EINTR:             return WSAEINTR;
        case EBADF:             return WSAEBADF;
1988
        case EPERM:
1989 1990 1991 1992
        case EACCES:            return WSAEACCES;
        case EFAULT:            return WSAEFAULT;
        case EINVAL:            return WSAEINVAL;
        case EMFILE:            return WSAEMFILE;
1993
        case EINPROGRESS:
1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
        case EWOULDBLOCK:       return WSAEWOULDBLOCK;
        case EALREADY:          return WSAEALREADY;
        case ENOTSOCK:          return WSAENOTSOCK;
        case EDESTADDRREQ:      return WSAEDESTADDRREQ;
        case EMSGSIZE:          return WSAEMSGSIZE;
        case EPROTOTYPE:        return WSAEPROTOTYPE;
        case ENOPROTOOPT:       return WSAENOPROTOOPT;
        case EPROTONOSUPPORT:   return WSAEPROTONOSUPPORT;
        case ESOCKTNOSUPPORT:   return WSAESOCKTNOSUPPORT;
        case EOPNOTSUPP:        return WSAEOPNOTSUPP;
        case EPFNOSUPPORT:      return WSAEPFNOSUPPORT;
        case EAFNOSUPPORT:      return WSAEAFNOSUPPORT;
        case EADDRINUSE:        return WSAEADDRINUSE;
        case EADDRNOTAVAIL:     return WSAEADDRNOTAVAIL;
        case ENETDOWN:          return WSAENETDOWN;
        case ENETUNREACH:       return WSAENETUNREACH;
        case ENETRESET:         return WSAENETRESET;
        case ECONNABORTED:      return WSAECONNABORTED;
2012
        case EPIPE:
2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
        case ECONNRESET:        return WSAECONNRESET;
        case ENOBUFS:           return WSAENOBUFS;
        case EISCONN:           return WSAEISCONN;
        case ENOTCONN:          return WSAENOTCONN;
        case ESHUTDOWN:         return WSAESHUTDOWN;
        case ETOOMANYREFS:      return WSAETOOMANYREFS;
        case ETIMEDOUT:         return WSAETIMEDOUT;
        case ECONNREFUSED:      return WSAECONNREFUSED;
        case ELOOP:             return WSAELOOP;
        case ENAMETOOLONG:      return WSAENAMETOOLONG;
        case EHOSTDOWN:         return WSAEHOSTDOWN;
        case EHOSTUNREACH:      return WSAEHOSTUNREACH;
        case ENOTEMPTY:         return WSAENOTEMPTY;
2026
#ifdef EPROCLIM
2027
        case EPROCLIM:          return WSAEPROCLIM;
2028 2029
#endif
#ifdef EUSERS
2030
        case EUSERS:            return WSAEUSERS;
2031 2032
#endif
#ifdef EDQUOT
2033
        case EDQUOT:            return WSAEDQUOT;
2034 2035
#endif
#ifdef ESTALE
2036
        case ESTALE:            return WSAESTALE;
2037 2038
#endif
#ifdef EREMOTE
2039
        case EREMOTE:           return WSAEREMOTE;
2040
#endif
2041 2042

        case 0:                 return 0;
2043 2044 2045 2046
        default:
            errno = err;
            perror("wineserver: sock_get_error() can't map error");
            return WSAEFAULT;
2047 2048 2049
    }
}

2050 2051 2052 2053 2054 2055 2056 2057
static int sock_get_ntstatus( int err )
{
    switch ( err )
    {
        case EBADF:             return STATUS_INVALID_HANDLE;
        case EBUSY:             return STATUS_DEVICE_BUSY;
        case EPERM:
        case EACCES:            return STATUS_ACCESS_DENIED;
2058
        case EFAULT:            return STATUS_ACCESS_VIOLATION;
2059 2060 2061
        case EINVAL:            return STATUS_INVALID_PARAMETER;
        case ENFILE:
        case EMFILE:            return STATUS_TOO_MANY_OPENED_FILES;
2062
        case EINPROGRESS:
2063
        case EWOULDBLOCK:       return STATUS_DEVICE_NOT_READY;
2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074
        case EALREADY:          return STATUS_NETWORK_BUSY;
        case ENOTSOCK:          return STATUS_OBJECT_TYPE_MISMATCH;
        case EDESTADDRREQ:      return STATUS_INVALID_PARAMETER;
        case EMSGSIZE:          return STATUS_BUFFER_OVERFLOW;
        case EPROTONOSUPPORT:
        case ESOCKTNOSUPPORT:
        case EPFNOSUPPORT:
        case EAFNOSUPPORT:
        case EPROTOTYPE:        return STATUS_NOT_SUPPORTED;
        case ENOPROTOOPT:       return STATUS_INVALID_PARAMETER;
        case EOPNOTSUPP:        return STATUS_NOT_SUPPORTED;
2075
        case EADDRINUSE:        return STATUS_SHARING_VIOLATION;
2076 2077 2078
        /* Linux returns ENODEV when specifying an invalid sin6_scope_id;
         * Windows returns STATUS_INVALID_ADDRESS_COMPONENT */
        case ENODEV:
2079
        case EADDRNOTAVAIL:     return STATUS_INVALID_ADDRESS_COMPONENT;
2080 2081
        case ECONNREFUSED:      return STATUS_CONNECTION_REFUSED;
        case ESHUTDOWN:         return STATUS_PIPE_DISCONNECTED;
2082
        case ENOTCONN:          return STATUS_INVALID_CONNECTION;
2083 2084
        case ETIMEDOUT:         return STATUS_IO_TIMEOUT;
        case ENETUNREACH:       return STATUS_NETWORK_UNREACHABLE;
2085
        case EHOSTUNREACH:      return STATUS_HOST_UNREACHABLE;
2086 2087 2088 2089
        case ENETDOWN:          return STATUS_NETWORK_BUSY;
        case EPIPE:
        case ECONNRESET:        return STATUS_CONNECTION_RESET;
        case ECONNABORTED:      return STATUS_CONNECTION_ABORTED;
2090
        case EISCONN:           return STATUS_CONNECTION_ACTIVE;
2091 2092 2093 2094 2095 2096 2097 2098 2099

        case 0:                 return STATUS_SUCCESS;
        default:
            errno = err;
            perror("wineserver: sock_get_ntstatus() can't map error");
            return STATUS_UNSUCCESSFUL;
    }
}

2100
static struct accept_req *alloc_accept_req( struct sock *sock, struct sock *acceptsock, struct async *async,
2101 2102 2103 2104 2105 2106 2107
                                            const struct afd_accept_into_params *params )
{
    struct accept_req *req = mem_alloc( sizeof(*req) );

    if (req)
    {
        req->async = (struct async *)grab_object( async );
2108
        req->iosb = async_get_iosb( async );
2109
        req->sock = (struct sock *)grab_object( sock );
2110
        req->acceptsock = acceptsock;
2111
        if (acceptsock) grab_object( acceptsock );
2112 2113 2114
        req->accepted = 0;
        req->recv_len = 0;
        req->local_len = 0;
2115 2116 2117 2118 2119
        if (params)
        {
            req->recv_len = params->recv_len;
            req->local_len = params->local_len;
        }
2120 2121 2122 2123
    }
    return req;
}

2124
static void sock_ioctl( struct fd *fd, ioctl_code_t code, struct async *async )
2125 2126
{
    struct sock *sock = get_fd_user( fd );
2127
    int unix_fd;
2128 2129 2130

    assert( sock->obj.ops == &sock_ops );

2131
    if (code != IOCTL_AFD_WINE_CREATE && (unix_fd = get_unix_fd( fd )) < 0) return;
2132 2133 2134

    switch(code)
    {
2135
    case IOCTL_AFD_WINE_CREATE:
2136 2137 2138 2139 2140 2141
    {
        const struct afd_create_params *params = get_req_data();

        if (get_req_data_size() != sizeof(*params))
        {
            set_error( STATUS_INVALID_PARAMETER );
2142
            return;
2143 2144
        }
        init_socket( sock, params->family, params->type, params->protocol, params->flags );
2145
        return;
2146 2147
    }

2148
    case IOCTL_AFD_WINE_ACCEPT:
2149 2150 2151 2152 2153 2154 2155
    {
        struct sock *acceptsock;
        obj_handle_t handle;

        if (get_reply_max_size() != sizeof(handle))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2156
            return;
2157 2158
        }

2159 2160 2161 2162
        if (!(acceptsock = accept_socket( sock )))
        {
            struct accept_req *req;

2163 2164
            if (sock->nonblocking) return;
            if (get_error() != STATUS_DEVICE_NOT_READY) return;
2165

2166
            if (!(req = alloc_accept_req( sock, NULL, async, NULL ))) return;
2167 2168
            list_add_tail( &sock->accept_list, &req->entry );

2169
            async_set_completion_callback( async, free_accept_req, req );
2170 2171 2172
            queue_async( &sock->accept_q, async );
            sock_reselect( sock );
            set_error( STATUS_PENDING );
2173
            return;
2174
        }
2175 2176 2177
        handle = alloc_handle( current->process, &acceptsock->obj,
                               GENERIC_READ | GENERIC_WRITE | SYNCHRONIZE, OBJ_INHERIT );
        acceptsock->wparam = handle;
2178
        sock_reselect( acceptsock );
2179 2180
        release_object( acceptsock );
        set_reply_data( &handle, sizeof(handle) );
2181
        return;
2182 2183
    }

2184
    case IOCTL_AFD_WINE_ACCEPT_INTO:
2185 2186
    {
        static const int access = FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES | FILE_READ_DATA;
2187
        const struct afd_accept_into_params *params = get_req_data();
2188
        struct sock *acceptsock;
2189 2190
        unsigned int remote_len;
        struct accept_req *req;
2191

2192 2193 2194
        if (get_req_data_size() != sizeof(*params) ||
            get_reply_max_size() < params->recv_len ||
            get_reply_max_size() - params->recv_len < params->local_len)
2195 2196
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2197
            return;
2198 2199
        }

2200 2201 2202 2203
        remote_len = get_reply_max_size() - params->recv_len - params->local_len;
        if (remote_len < sizeof(int))
        {
            set_error( STATUS_INVALID_PARAMETER );
2204
            return;
2205 2206 2207
        }

        if (!(acceptsock = (struct sock *)get_handle_obj( current->process, params->accept_handle, access, &sock_ops )))
2208
            return;
2209

2210 2211 2212
        if (acceptsock->accept_recv_req)
        {
            release_object( acceptsock );
2213
            set_error( STATUS_INVALID_PARAMETER );
2214
            return;
2215 2216
        }

2217
        if (!(req = alloc_accept_req( sock, acceptsock, async, params )))
2218 2219
        {
            release_object( acceptsock );
2220
            return;
2221
        }
2222 2223
        list_add_tail( &sock->accept_list, &req->entry );
        acceptsock->accept_recv_req = req;
2224
        release_object( acceptsock );
2225 2226

        acceptsock->wparam = params->accept_handle;
2227
        async_set_completion_callback( async, free_accept_req, req );
2228 2229 2230
        queue_async( &sock->accept_q, async );
        sock_reselect( sock );
        set_error( STATUS_PENDING );
2231
        return;
2232 2233
    }

2234 2235 2236 2237 2238 2239 2240
    case IOCTL_AFD_LISTEN:
    {
        const struct afd_listen_params *params = get_req_data();

        if (get_req_data_size() < sizeof(*params))
        {
            set_error( STATUS_INVALID_PARAMETER );
2241
            return;
2242 2243
        }

2244 2245 2246
        if (!sock->bound)
        {
            set_error( STATUS_INVALID_PARAMETER );
2247
            return;
2248 2249
        }

2250 2251 2252
        if (listen( unix_fd, params->backlog ) < 0)
        {
            set_error( sock_get_ntstatus( errno ) );
2253
            return;
2254 2255
        }

2256
        sock->state = SOCK_LISTENING;
2257

2258 2259 2260
        /* a listening socket can no longer be accepted into */
        allow_fd_caching( sock->fd );

2261
        /* we may already be selecting for AFD_POLL_ACCEPT */
2262
        sock_reselect( sock );
2263
        return;
2264 2265
    }

2266 2267 2268
    case IOCTL_AFD_WINE_CONNECT:
    {
        const struct afd_connect_params *params = get_req_data();
2269
        const struct WS_sockaddr *addr;
2270
        union unix_sockaddr unix_addr;
2271
        struct connect_req *req;
2272
        socklen_t unix_len;
2273 2274 2275 2276 2277 2278
        int send_len, ret;

        if (get_req_data_size() < sizeof(*params) ||
            get_req_data_size() - sizeof(*params) < params->addr_len)
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2279
            return;
2280 2281
        }
        send_len = get_req_data_size() - sizeof(*params) - params->addr_len;
2282
        addr = (const struct WS_sockaddr *)(params + 1);
2283

2284 2285 2286
        if (!params->synchronous && !sock->bound)
        {
            set_error( STATUS_INVALID_PARAMETER );
2287
            return;
2288 2289
        }

2290 2291 2292
        if (sock->accept_recv_req)
        {
            set_error( STATUS_INVALID_PARAMETER );
2293
            return;
2294 2295 2296 2297
        }

        if (sock->connect_req)
        {
2298
            set_error( STATUS_INVALID_PARAMETER );
2299
            return;
2300 2301
        }

2302
        switch (sock->state)
2303
        {
2304 2305
            case SOCK_LISTENING:
                set_error( STATUS_INVALID_PARAMETER );
2306
                return;
2307 2308 2309 2310 2311

            case SOCK_CONNECTING:
                /* FIXME: STATUS_ADDRESS_ALREADY_ASSOCIATED probably isn't right,
                 * but there's no status code that maps to WSAEALREADY... */
                set_error( params->synchronous ? STATUS_ADDRESS_ALREADY_ASSOCIATED : STATUS_INVALID_PARAMETER );
2312
                return;
2313 2314 2315

            case SOCK_CONNECTED:
                set_error( STATUS_CONNECTION_ACTIVE );
2316
                return;
2317 2318 2319 2320

            case SOCK_UNCONNECTED:
            case SOCK_CONNECTIONLESS:
                break;
2321 2322
        }

2323 2324 2325 2326
        unix_len = sockaddr_to_unix( addr, params->addr_len, &unix_addr );
        if (!unix_len)
        {
            set_error( STATUS_INVALID_ADDRESS );
2327
            return;
2328 2329 2330 2331 2332
        }
        if (unix_addr.addr.sa_family == AF_INET && !memcmp( &unix_addr.in.sin_addr, magic_loopback_addr, 4 ))
            unix_addr.in.sin_addr.s_addr = htonl( INADDR_LOOPBACK );

        ret = connect( unix_fd, &unix_addr.addr, unix_len );
2333 2334 2335
        if (ret < 0 && errno != EINPROGRESS)
        {
            set_error( sock_get_ntstatus( errno ) );
2336
            return;
2337 2338
        }

2339 2340 2341
        /* a connected or connecting socket can no longer be accepted into */
        allow_fd_caching( sock->fd );

2342
        unix_len = sizeof(unix_addr);
2343
        if (!getsockname( unix_fd, &unix_addr.addr, &unix_len ))
2344 2345 2346
            sock->addr_len = sockaddr_from_unix( &unix_addr, &sock->addr.addr, sizeof(sock->addr) );
        sock->bound = 1;

2347 2348
        if (!ret)
        {
2349
            sock->state = SOCK_CONNECTED;
2350

2351
            if (!send_len) return;
2352 2353
        }

2354
        sock->state = SOCK_CONNECTING;
2355

2356
        if (params->synchronous && sock->nonblocking)
2357 2358 2359
        {
            sock_reselect( sock );
            set_error( STATUS_DEVICE_NOT_READY );
2360
            return;
2361 2362
        }

2363 2364 2365
        if (!(req = mem_alloc( sizeof(*req) )))
            return;

2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377
        req->async = (struct async *)grab_object( async );
        req->iosb = async_get_iosb( async );
        req->sock = (struct sock *)grab_object( sock );
        req->addr_len = params->addr_len;
        req->send_len = send_len;
        req->send_cursor = 0;

        async_set_completion_callback( async, free_connect_req, req );
        sock->connect_req = req;
        queue_async( &sock->connect_q, async );
        sock_reselect( sock );
        set_error( STATUS_PENDING );
2378
        return;
2379 2380
    }

2381 2382 2383 2384 2385 2386 2387
    case IOCTL_AFD_WINE_SHUTDOWN:
    {
        unsigned int how;

        if (get_req_data_size() < sizeof(int))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2388
            return;
2389 2390 2391 2392 2393 2394
        }
        how = *(int *)get_req_data();

        if (how > SD_BOTH)
        {
            set_error( STATUS_INVALID_PARAMETER );
2395
            return;
2396 2397
        }

2398
        if (sock->state != SOCK_CONNECTED && sock->state != SOCK_CONNECTIONLESS)
2399 2400
        {
            set_error( STATUS_INVALID_CONNECTION );
2401
            return;
2402 2403 2404 2405
        }

        if (how != SD_SEND)
        {
2406
            sock->rd_shutdown = 1;
2407 2408 2409
        }
        if (how != SD_RECEIVE)
        {
2410
            sock->wr_shutdown = 1;
2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422
            if (list_empty( &sock->write_q.queue ))
                shutdown( unix_fd, SHUT_WR );
            else
                sock->wr_shutdown_pending = 1;
        }

        if (how == SD_BOTH)
        {
            if (sock->event) release_object( sock->event );
            sock->event = NULL;
            sock->window = 0;
            sock->mask = 0;
2423
            sock->nonblocking = 1;
2424 2425 2426
        }

        sock_reselect( sock );
2427
        return;
2428 2429
    }

2430
    case IOCTL_AFD_WINE_ADDRESS_LIST_CHANGE:
2431 2432 2433 2434 2435 2436
    {
        int force_async;

        if (get_req_data_size() < sizeof(int))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2437
            return;
2438 2439 2440
        }
        force_async = *(int *)get_req_data();

2441
        if (sock->nonblocking && !force_async)
2442
        {
2443
            set_error( STATUS_DEVICE_NOT_READY );
2444
            return;
2445
        }
2446
        if (!sock_get_ifchange( sock )) return;
2447 2448
        queue_async( &sock->ifchange_q, async );
        set_error( STATUS_PENDING );
2449
        return;
2450
    }
2451

2452 2453 2454 2455
    case IOCTL_AFD_WINE_FIONBIO:
        if (get_req_data_size() < sizeof(int))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2456
            return;
2457 2458 2459
        }
        if (*(int *)get_req_data())
        {
2460
            sock->nonblocking = 1;
2461 2462 2463 2464 2465 2466
        }
        else
        {
            if (sock->mask)
            {
                set_error( STATUS_INVALID_PARAMETER );
2467
                return;
2468
            }
2469
            sock->nonblocking = 0;
2470
        }
2471
        return;
2472

2473 2474 2475 2476 2477 2478 2479 2480
    case IOCTL_AFD_GET_EVENTS:
    {
        struct afd_get_events_params params = {0};
        unsigned int i;

        if (get_reply_max_size() < sizeof(params))
        {
            set_error( STATUS_INVALID_PARAMETER );
2481
            return;
2482 2483 2484 2485 2486 2487 2488 2489 2490 2491
        }

        params.flags = sock->pending_events & sock->mask;
        for (i = 0; i < ARRAY_SIZE( params.status ); ++i)
            params.status[i] = sock_get_ntstatus( sock->errors[i] );

        sock->pending_events = 0;
        sock_reselect( sock );

        set_reply_data( &params, sizeof(params) );
2492
        return;
2493 2494
    }

2495 2496 2497 2498 2499 2500
    case IOCTL_AFD_EVENT_SELECT:
    {
        struct event *event = NULL;
        obj_handle_t event_handle;
        int mask;

2501
        set_async_pending( async );
2502

2503 2504 2505 2506
        if (is_machine_64bit( current->process->machine ))
        {
            const struct afd_event_select_params_64 *params = get_req_data();

2507
            if (get_req_data_size() < sizeof(*params))
2508 2509
            {
                set_error( STATUS_INVALID_PARAMETER );
2510
                return;
2511 2512 2513 2514 2515 2516 2517 2518 2519
            }

            event_handle = params->event;
            mask = params->mask;
        }
        else
        {
            const struct afd_event_select_params_32 *params = get_req_data();

2520
            if (get_req_data_size() < sizeof(*params))
2521 2522
            {
                set_error( STATUS_INVALID_PARAMETER );
2523
                return;
2524 2525 2526 2527 2528 2529 2530 2531 2532 2533
            }

            event_handle = params->event;
            mask = params->mask;
        }

        if ((event_handle || mask) &&
            !(event = get_event_obj( current->process, event_handle, EVENT_MODIFY_STATE )))
        {
            set_error( STATUS_INVALID_PARAMETER );
2534
            return;
2535 2536 2537 2538 2539 2540 2541 2542 2543
        }

        if (sock->event) release_object( sock->event );
        sock->event = event;
        sock->mask = mask;
        sock->window = 0;
        sock->message = 0;
        sock->wparam = 0;
        sock->nonblocking = 1;
2544 2545

        sock_reselect( sock );
2546 2547 2548 2549 2550
        /* Explicitly wake the socket up if the mask causes it to become
         * signaled. Note that reselecting isn't enough, since we might already
         * have had events recorded in sock->reported_events and we don't want
         * to select for them again. */
        sock_wake_up( sock );
2551

2552
        return;
2553 2554 2555 2556 2557 2558 2559 2560 2561
    }

    case IOCTL_AFD_WINE_MESSAGE_SELECT:
    {
        const struct afd_message_select_params *params = get_req_data();

        if (get_req_data_size() < sizeof(params))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2562
            return;
2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577
        }

        if (sock->event) release_object( sock->event );

        if (params->window)
        {
            sock->pending_events = 0;
            sock->reported_events = 0;
        }
        sock->event = NULL;
        sock->mask = params->mask;
        sock->window = params->window;
        sock->message = params->message;
        sock->wparam = params->handle;
        sock->nonblocking = 1;
2578 2579 2580

        sock_reselect( sock );

2581
        return;
2582 2583
    }

2584 2585 2586
    case IOCTL_AFD_BIND:
    {
        const struct afd_bind_params *params = get_req_data();
2587
        union unix_sockaddr unix_addr, bind_addr;
2588 2589 2590 2591 2592 2593 2594 2595
        data_size_t in_size;
        socklen_t unix_len;

        /* the ioctl is METHOD_NEITHER, so ntdll gives us the output buffer as
         * input */
        if (get_req_data_size() < get_reply_max_size())
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2596
            return;
2597 2598 2599
        }
        in_size = get_req_data_size() - get_reply_max_size();
        if (in_size < offsetof(struct afd_bind_params, addr.sa_data)
2600
                || get_reply_max_size() < in_size - sizeof(int))
2601 2602
        {
            set_error( STATUS_INVALID_PARAMETER );
2603
            return;
2604 2605
        }

2606 2607 2608
        if (sock->bound)
        {
            set_error( STATUS_ADDRESS_ALREADY_ASSOCIATED );
2609
            return;
2610 2611
        }

2612 2613 2614 2615
        unix_len = sockaddr_to_unix( &params->addr, in_size - sizeof(int), &unix_addr );
        if (!unix_len)
        {
            set_error( STATUS_INVALID_ADDRESS );
2616
            return;
2617
        }
2618
        bind_addr = unix_addr;
2619

2620
        if (unix_addr.addr.sa_family == AF_INET)
2621 2622 2623
        {
            if (!memcmp( &unix_addr.in.sin_addr, magic_loopback_addr, 4 )
                    || bind_to_interface( sock, &unix_addr.in ))
2624
                bind_addr.in.sin_addr.s_addr = htonl( INADDR_ANY );
2625
        }
2626 2627 2628 2629 2630 2631 2632 2633 2634 2635
        else if (unix_addr.addr.sa_family == AF_INET6)
        {
#ifdef HAVE_STRUCT_SOCKADDR_IN6_SIN6_SCOPE_ID
            /* Windows allows specifying zero to use the default scope. Linux
             * interprets it as an interface index and requires that it be
             * nonzero. */
            if (!unix_addr.in6.sin6_scope_id)
                bind_addr.in6.sin6_scope_id = get_ipv6_interface_index( &unix_addr.in6.sin6_addr );
#endif
        }
2636

2637
        set_async_pending( async );
2638

2639
        if (bind( unix_fd, &bind_addr.addr, unix_len ) < 0)
2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650
        {
            if (errno == EADDRINUSE)
            {
                int reuse;
                socklen_t len = sizeof(reuse);

                if (!getsockopt( unix_fd, SOL_SOCKET, SO_REUSEADDR, (char *)&reuse, &len ) && reuse)
                    errno = EACCES;
            }

            set_error( sock_get_ntstatus( errno ) );
2651
            return;
2652
        }
2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665

        sock->bound = 1;

        unix_len = sizeof(bind_addr);
        if (!getsockname( unix_fd, &bind_addr.addr, &unix_len ))
        {
            /* store the interface or magic loopback address instead of the
             * actual unix address */
            if (bind_addr.addr.sa_family == AF_INET)
                bind_addr.in.sin_addr = unix_addr.in.sin_addr;
            sock->addr_len = sockaddr_from_unix( &bind_addr, &sock->addr.addr, sizeof(sock->addr) );
        }

2666 2667
        if (get_reply_max_size() >= sock->addr_len)
            set_reply_data( &sock->addr, sock->addr_len );
2668
        return;
2669 2670
    }

2671 2672 2673 2674
    case IOCTL_AFD_GETSOCKNAME:
        if (!sock->bound)
        {
            set_error( STATUS_INVALID_PARAMETER );
2675
            return;
2676 2677 2678 2679 2680
        }

        if (get_reply_max_size() < sock->addr_len)
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2681
            return;
2682 2683 2684
        }

        set_reply_data( &sock->addr, sock->addr_len );
2685
        return;
2686

2687 2688 2689 2690 2691 2692 2693 2694
    case IOCTL_AFD_WINE_DEFER:
    {
        const obj_handle_t *handle = get_req_data();
        struct sock *acceptsock;

        if (get_req_data_size() < sizeof(*handle))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2695
            return;
2696 2697 2698
        }

        acceptsock = (struct sock *)get_handle_obj( current->process, *handle, 0, &sock_ops );
2699
        if (!acceptsock) return;
2700 2701

        sock->deferred = acceptsock;
2702
        return;
2703 2704
    }

2705 2706 2707 2708 2709 2710 2711
    case IOCTL_AFD_WINE_GET_INFO:
    {
        struct afd_get_info_params params;

        if (get_reply_max_size() < sizeof(params))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2712
            return;
2713 2714 2715 2716 2717 2718
        }

        params.family = sock->family;
        params.type = sock->type;
        params.protocol = sock->proto;
        set_reply_data( &params, sizeof(params) );
2719
        return;
2720 2721
    }

2722 2723 2724 2725 2726 2727 2728
    case IOCTL_AFD_WINE_GET_SO_ACCEPTCONN:
    {
        int listening = (sock->state == SOCK_LISTENING);

        if (get_reply_max_size() < sizeof(listening))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2729
            return;
2730 2731 2732
        }

        set_reply_data( &listening, sizeof(listening) );
2733
        return;
2734 2735
    }

2736 2737 2738 2739 2740 2741 2742 2743 2744
    case IOCTL_AFD_WINE_GET_SO_ERROR:
    {
        int error;
        socklen_t len = sizeof(error);
        unsigned int i;

        if (get_reply_max_size() < sizeof(error))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2745
            return;
2746 2747 2748 2749 2750
        }

        if (getsockopt( unix_fd, SOL_SOCKET, SO_ERROR, (char *)&error, &len ) < 0)
        {
            set_error( sock_get_ntstatus( errno ) );
2751
            return;
2752 2753 2754 2755 2756 2757 2758 2759
        }

        if (!error)
        {
            for (i = 0; i < ARRAY_SIZE( sock->errors ); ++i)
            {
                if (sock->errors[i])
                {
2760
                    error = sock_get_error( sock->errors[i] );
2761 2762 2763 2764 2765 2766
                    break;
                }
            }
        }

        set_reply_data( &error, sizeof(error) );
2767
        return;
2768 2769
    }

2770 2771 2772 2773 2774 2775 2776
    case IOCTL_AFD_WINE_GET_SO_RCVBUF:
    {
        int rcvbuf = sock->rcvbuf;

        if (get_reply_max_size() < sizeof(rcvbuf))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2777
            return;
2778 2779 2780
        }

        set_reply_data( &rcvbuf, sizeof(rcvbuf) );
2781
        return;
2782 2783
    }

2784 2785 2786 2787 2788 2789 2790
    case IOCTL_AFD_WINE_SET_SO_RCVBUF:
    {
        DWORD rcvbuf;

        if (get_req_data_size() < sizeof(rcvbuf))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2791
            return;
2792 2793 2794
        }
        rcvbuf = *(DWORD *)get_req_data();

2795 2796 2797
        if (!setsockopt( unix_fd, SOL_SOCKET, SO_RCVBUF, (char *)&rcvbuf, sizeof(rcvbuf) ))
            sock->rcvbuf = rcvbuf;
        else
2798
            set_error( sock_get_ntstatus( errno ) );
2799
        return;
2800 2801
    }

2802 2803 2804 2805 2806 2807 2808
    case IOCTL_AFD_WINE_GET_SO_RCVTIMEO:
    {
        DWORD rcvtimeo = sock->rcvtimeo;

        if (get_reply_max_size() < sizeof(rcvtimeo))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2809
            return;
2810 2811 2812
        }

        set_reply_data( &rcvtimeo, sizeof(rcvtimeo) );
2813
        return;
2814 2815 2816 2817 2818 2819 2820 2821 2822
    }

    case IOCTL_AFD_WINE_SET_SO_RCVTIMEO:
    {
        DWORD rcvtimeo;

        if (get_req_data_size() < sizeof(rcvtimeo))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2823
            return;
2824 2825 2826 2827
        }
        rcvtimeo = *(DWORD *)get_req_data();

        sock->rcvtimeo = rcvtimeo;
2828
        return;
2829 2830
    }

2831 2832 2833 2834 2835 2836 2837
    case IOCTL_AFD_WINE_GET_SO_SNDBUF:
    {
        int sndbuf = sock->sndbuf;

        if (get_reply_max_size() < sizeof(sndbuf))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2838
            return;
2839 2840 2841
        }

        set_reply_data( &sndbuf, sizeof(sndbuf) );
2842
        return;
2843 2844
    }

2845 2846 2847 2848 2849 2850 2851
    case IOCTL_AFD_WINE_SET_SO_SNDBUF:
    {
        DWORD sndbuf;

        if (get_req_data_size() < sizeof(sndbuf))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2852
            return;
2853 2854 2855 2856 2857 2858 2859
        }
        sndbuf = *(DWORD *)get_req_data();

#ifdef __APPLE__
        if (!sndbuf)
        {
            /* setsockopt fails if a zero value is passed */
2860
            sock->sndbuf = sndbuf;
2861
            return;
2862 2863 2864
        }
#endif

2865 2866 2867
        if (!setsockopt( unix_fd, SOL_SOCKET, SO_SNDBUF, (char *)&sndbuf, sizeof(sndbuf) ))
            sock->sndbuf = sndbuf;
        else
2868
            set_error( sock_get_ntstatus( errno ) );
2869
        return;
2870 2871
    }

2872 2873 2874 2875 2876 2877 2878
    case IOCTL_AFD_WINE_GET_SO_SNDTIMEO:
    {
        DWORD sndtimeo = sock->sndtimeo;

        if (get_reply_max_size() < sizeof(sndtimeo))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2879
            return;
2880 2881 2882
        }

        set_reply_data( &sndtimeo, sizeof(sndtimeo) );
2883
        return;
2884 2885 2886 2887 2888 2889 2890 2891 2892
    }

    case IOCTL_AFD_WINE_SET_SO_SNDTIMEO:
    {
        DWORD sndtimeo;

        if (get_req_data_size() < sizeof(sndtimeo))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2893
            return;
2894 2895 2896 2897
        }
        sndtimeo = *(DWORD *)get_req_data();

        sock->sndtimeo = sndtimeo;
2898
        return;
2899 2900
    }

2901 2902 2903 2904 2905 2906 2907
    case IOCTL_AFD_WINE_GET_SO_CONNECT_TIME:
    {
        DWORD time = ~0u;

        if (get_reply_max_size() < sizeof(time))
        {
            set_error( STATUS_BUFFER_TOO_SMALL );
2908
            return;
2909 2910 2911 2912 2913 2914
        }

        if (sock->state == SOCK_CONNECTED)
            time = (current_time - sock->connect_time) / 10000000;

        set_reply_data( &time, sizeof(time) );
2915
        return;
2916 2917
    }

2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966
    case IOCTL_AFD_POLL:
    {
        if (get_reply_max_size() < get_req_data_size())
        {
            set_error( STATUS_INVALID_PARAMETER );
            return;
        }

        if (is_machine_64bit( current->process->machine ))
        {
            const struct afd_poll_params_64 *params = get_req_data();

            if (get_req_data_size() < sizeof(struct afd_poll_params_64) ||
                get_req_data_size() < offsetof( struct afd_poll_params_64, sockets[params->count] ))
            {
                set_error( STATUS_INVALID_PARAMETER );
                return;
            }

            poll_socket( sock, async, params->exclusive, params->timeout, params->count, params->sockets );
        }
        else
        {
            const struct afd_poll_params_32 *params = get_req_data();
            struct afd_poll_socket_64 *sockets;
            unsigned int i;

            if (get_req_data_size() < sizeof(struct afd_poll_params_32) ||
                get_req_data_size() < offsetof( struct afd_poll_params_32, sockets[params->count] ))
            {
                set_error( STATUS_INVALID_PARAMETER );
                return;
            }

            if (!(sockets = mem_alloc( params->count * sizeof(*sockets) ))) return;
            for (i = 0; i < params->count; ++i)
            {
                sockets[i].socket = params->sockets[i].socket;
                sockets[i].flags = params->sockets[i].flags;
                sockets[i].status = params->sockets[i].status;
            }

            poll_socket( sock, async, params->exclusive, params->timeout, params->count, sockets );
            free( sockets );
        }

        return;
    }

2967 2968
    default:
        set_error( STATUS_NOT_SUPPORTED );
2969
        return;
2970 2971 2972
    }
}

2973 2974 2975 2976 2977 2978 2979 2980 2981
static int poll_single_socket( struct sock *sock, int mask )
{
    struct pollfd pollfd;

    pollfd.fd = get_unix_fd( sock->fd );
    pollfd.events = poll_flags_from_afd( sock, mask );
    if (pollfd.events < 0 || poll( &pollfd, 1, 0 ) < 0)
        return 0;

2982 2983 2984
    if (sock->state == SOCK_CONNECTING && (pollfd.revents & (POLLERR | POLLHUP)))
        pollfd.revents &= ~POLLOUT;

2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998
    if ((mask & AFD_POLL_HUP) && (pollfd.revents & POLLIN) && sock->type == WS_SOCK_STREAM)
    {
        char dummy;

        if (!recv( get_unix_fd( sock->fd ), &dummy, 1, MSG_PEEK ))
        {
            pollfd.revents &= ~POLLIN;
            pollfd.revents |= POLLHUP;
        }
    }

    return get_poll_flags( sock, pollfd.revents ) & mask;
}

2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019
static void handle_exclusive_poll(struct poll_req *req)
{
    unsigned int i;

    for (i = 0; i < req->count; ++i)
    {
        struct sock *sock = req->sockets[i].sock;
        struct poll_req *main_poll = sock->main_poll;

        if (main_poll && main_poll->exclusive && req->exclusive)
        {
            complete_async_poll( main_poll, STATUS_SUCCESS );
            main_poll = NULL;
        }

        if (!main_poll)
            sock->main_poll = req;
    }
}

static void poll_socket( struct sock *poll_sock, struct async *async, int exclusive, timeout_t timeout,
3020
                         unsigned int count, const struct afd_poll_socket_64 *sockets )
3021
{
3022
    BOOL signaled = FALSE;
3023 3024 3025
    struct poll_req *req;
    unsigned int i, j;

3026 3027 3028
    if (!count)
    {
        set_error( STATUS_INVALID_PARAMETER );
3029
        return;
3030
    }
3031 3032

    if (!(req = mem_alloc( offsetof( struct poll_req, sockets[count] ) )))
3033
        return;
3034 3035 3036 3037 3038 3039

    req->timeout = NULL;
    if (timeout && timeout != TIMEOUT_INFINITE &&
        !(req->timeout = add_timeout_user( timeout, async_poll_timeout, req )))
    {
        free( req );
3040
        return;
3041
    }
3042
    req->orig_timeout = timeout;
3043 3044 3045

    for (i = 0; i < count; ++i)
    {
3046
        req->sockets[i].sock = (struct sock *)get_handle_obj( current->process, sockets[i].socket, 0, &sock_ops );
3047 3048
        if (!req->sockets[i].sock)
        {
3049
            for (j = 0; j < i; ++j) release_object( req->sockets[j].sock );
3050 3051
            if (req->timeout) remove_timeout_user( req->timeout );
            free( req );
3052
            return;
3053
        }
3054 3055 3056
        req->sockets[i].handle = sockets[i].socket;
        req->sockets[i].mask = sockets[i].flags;
        req->sockets[i].flags = 0;
3057 3058
    }

3059
    req->exclusive = exclusive;
3060 3061 3062 3063
    req->count = count;
    req->async = (struct async *)grab_object( async );
    req->iosb = async_get_iosb( async );

3064 3065
    handle_exclusive_poll(req);

3066 3067 3068 3069 3070 3071 3072
    list_add_tail( &poll_list, &req->entry );
    async_set_completion_callback( async, free_poll_req, req );
    queue_async( &poll_sock->poll_q, async );

    for (i = 0; i < count; ++i)
    {
        struct sock *sock = req->sockets[i].sock;
3073
        int mask = req->sockets[i].mask;
3074
        int flags = poll_single_socket( sock, mask );
3075 3076 3077

        if (flags)
        {
3078
            signaled = TRUE;
3079 3080
            req->sockets[i].flags = flags;
            req->sockets[i].status = sock_get_ntstatus( sock_error( sock->fd ) );
3081
        }
3082 3083 3084 3085

        /* FIXME: do other error conditions deserve a similar treatment? */
        if (sock->state != SOCK_CONNECTING && sock->errors[AFD_POLL_BIT_CONNECT_ERR] && (mask & AFD_POLL_CONNECT_ERR))
        {
3086
            signaled = TRUE;
3087 3088
            req->sockets[i].flags |= AFD_POLL_CONNECT_ERR;
            req->sockets[i].status = sock_get_ntstatus( sock->errors[AFD_POLL_BIT_CONNECT_ERR] );
3089
        }
3090 3091
    }

3092
    if (!timeout || signaled)
3093
        complete_async_poll( req, STATUS_SUCCESS );
3094 3095 3096 3097 3098 3099

    for (i = 0; i < req->count; ++i)
        sock_reselect( req->sockets[i].sock );
    set_error( STATUS_PENDING );
}

3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121
#ifdef HAVE_LINUX_RTNETLINK_H

/* only keep one ifchange object around, all sockets waiting for wakeups will look to it */
static struct object *ifchange_object;

static void ifchange_dump( struct object *obj, int verbose );
static struct fd *ifchange_get_fd( struct object *obj );
static void ifchange_destroy( struct object *obj );

static int ifchange_get_poll_events( struct fd *fd );
static void ifchange_poll_event( struct fd *fd, int event );

struct ifchange
{
    struct object       obj;     /* object header */
    struct fd          *fd;      /* interface change file descriptor */
    struct list         sockets; /* list of sockets to send interface change notifications */
};

static const struct object_ops ifchange_ops =
{
    sizeof(struct ifchange), /* size */
3122
    &no_type,                /* type */
3123
    ifchange_dump,           /* dump */
3124
    no_add_queue,            /* add_queue */
3125 3126 3127 3128 3129
    NULL,                    /* remove_queue */
    NULL,                    /* signaled */
    no_satisfied,            /* satisfied */
    no_signal,               /* signal */
    ifchange_get_fd,         /* get_fd */
3130
    default_map_access,      /* map_access */
3131 3132
    default_get_sd,          /* get_sd */
    default_set_sd,          /* set_sd */
3133
    no_get_full_name,        /* get_full_name */
3134
    no_lookup_name,          /* lookup_name */
3135 3136
    no_link_name,            /* link_name */
    NULL,                    /* unlink_name */
3137
    no_open_file,            /* open_file */
3138
    no_kernel_obj_list,      /* get_kernel_obj_list */
3139 3140 3141 3142 3143 3144 3145 3146 3147
    no_close_handle,         /* close_handle */
    ifchange_destroy         /* destroy */
};

static const struct fd_ops ifchange_fd_ops =
{
    ifchange_get_poll_events, /* get_poll_events */
    ifchange_poll_event,      /* poll_event */
    NULL,                     /* get_fd_type */
3148 3149 3150
    no_fd_read,               /* read */
    no_fd_write,              /* write */
    no_fd_flush,              /* flush */
3151
    no_fd_get_file_info,      /* get_file_info */
3152
    no_fd_get_volume_info,    /* get_volume_info */
3153
    no_fd_ioctl,              /* ioctl */
3154
    NULL,                     /* cancel_async */
3155
    NULL,                     /* queue_async */
3156
    NULL                      /* reselect_async */
3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183
};

static void ifchange_dump( struct object *obj, int verbose )
{
    assert( obj->ops == &ifchange_ops );
    fprintf( stderr, "Interface change\n" );
}

static struct fd *ifchange_get_fd( struct object *obj )
{
    struct ifchange *ifchange = (struct ifchange *)obj;
    return (struct fd *)grab_object( ifchange->fd );
}

static void ifchange_destroy( struct object *obj )
{
    struct ifchange *ifchange = (struct ifchange *)obj;
    assert( obj->ops == &ifchange_ops );

    release_object( ifchange->fd );

    /* reset the global ifchange object so that it will be recreated if it is needed again */
    assert( obj == ifchange_object );
    ifchange_object = NULL;
}

static int ifchange_get_poll_events( struct fd *fd )
3184
{
3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199
    return POLLIN;
}

/* wake up all the sockets waiting for a change notification event */
static void ifchange_wake_up( struct object *obj, unsigned int status )
{
    struct ifchange *ifchange = (struct ifchange *)obj;
    struct list *ptr, *next;
    assert( obj->ops == &ifchange_ops );
    assert( obj == ifchange_object );

    LIST_FOR_EACH_SAFE( ptr, next, &ifchange->sockets )
    {
        struct sock *sock = LIST_ENTRY( ptr, struct sock, ifchange_entry );

3200
        assert( sock->ifchange_obj );
3201
        async_wake_up( &sock->ifchange_q, status ); /* issue ifchange notification for the socket */
3202
        sock_release_ifchange( sock ); /* remove socket from list and decrement ifchange refcount */
3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215
    }
}

static void ifchange_poll_event( struct fd *fd, int event )
{
    struct object *ifchange = get_fd_user( fd );
    unsigned int status = STATUS_PENDING;
    char buffer[PIPE_BUF];
    int r;

    r = recv( get_unix_fd(fd), buffer, sizeof(buffer), MSG_DONTWAIT );
    if (r < 0)
    {
3216 3217
        if (errno == EWOULDBLOCK || (EWOULDBLOCK != EAGAIN && errno == EAGAIN))
            return;  /* retry when poll() says the socket is ready */
3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257
        status = sock_get_ntstatus( errno );
    }
    else if (r > 0)
    {
        struct nlmsghdr *nlh;

        for (nlh = (struct nlmsghdr *)buffer; NLMSG_OK(nlh, r); nlh = NLMSG_NEXT(nlh, r))
        {
            if (nlh->nlmsg_type == NLMSG_DONE)
                break;
            if (nlh->nlmsg_type == RTM_NEWADDR || nlh->nlmsg_type == RTM_DELADDR)
                status = STATUS_SUCCESS;
        }
    }
    else status = STATUS_CANCELLED;

    if (status != STATUS_PENDING) ifchange_wake_up( ifchange, status );
}

#endif

/* we only need one of these interface notification objects, all of the sockets dependent upon
 * it will wake up when a notification event occurs */
 static struct object *get_ifchange( void )
 {
#ifdef HAVE_LINUX_RTNETLINK_H
    struct ifchange *ifchange;
    struct sockaddr_nl addr;
    int unix_fd;

    if (ifchange_object)
    {
        /* increment the refcount for each socket that uses the ifchange object */
        return grab_object( ifchange_object );
    }

    /* create the socket we need for processing interface change notifications */
    unix_fd = socket( PF_NETLINK, SOCK_RAW, NETLINK_ROUTE );
    if (unix_fd == -1)
    {
3258
        set_error( sock_get_ntstatus( errno ));
3259 3260 3261 3262 3263 3264 3265 3266 3267 3268
        return NULL;
    }
    fcntl( unix_fd, F_SETFL, O_NONBLOCK ); /* make socket nonblocking */
    memset( &addr, 0, sizeof(addr) );
    addr.nl_family = AF_NETLINK;
    addr.nl_groups = RTMGRP_IPV4_IFADDR;
    /* bind the socket to the special netlink kernel interface */
    if (bind( unix_fd, (struct sockaddr *)&addr, sizeof(addr) ) == -1)
    {
        close( unix_fd );
3269
        set_error( sock_get_ntstatus( errno ));
3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290
        return NULL;
    }
    if (!(ifchange = alloc_object( &ifchange_ops )))
    {
        close( unix_fd );
        set_error( STATUS_NO_MEMORY );
        return NULL;
    }
    list_init( &ifchange->sockets );
    if (!(ifchange->fd = create_anonymous_fd( &ifchange_fd_ops, unix_fd, &ifchange->obj, 0 )))
    {
        release_object( ifchange );
        set_error( STATUS_NO_MEMORY );
        return NULL;
    }
    set_fd_events( ifchange->fd, POLLIN ); /* enable read wakeup on the file descriptor */

    /* the ifchange object is now successfully configured */
    ifchange_object = &ifchange->obj;
    return &ifchange->obj;
#else
3291 3292
    set_error( STATUS_NOT_SUPPORTED );
    return NULL;
3293
#endif
3294 3295
}

3296
/* add the socket to the interface change notification list */
3297 3298
static void ifchange_add_sock( struct object *obj, struct sock *sock )
{
3299 3300 3301 3302 3303
#ifdef HAVE_LINUX_RTNETLINK_H
    struct ifchange *ifchange = (struct ifchange *)obj;

    list_add_tail( &ifchange->sockets, &sock->ifchange_entry );
#endif
3304 3305 3306
}

/* create a new ifchange queue for a specific socket or, if one already exists, reuse the existing one */
3307
static struct object *sock_get_ifchange( struct sock *sock )
3308 3309 3310
{
    struct object *ifchange;

3311 3312
    if (sock->ifchange_obj) /* reuse existing ifchange_obj for this socket */
        return sock->ifchange_obj;
3313 3314 3315 3316 3317 3318 3319

    if (!(ifchange = get_ifchange()))
        return NULL;

    /* add the socket to the ifchange notification list */
    ifchange_add_sock( ifchange, sock );
    sock->ifchange_obj = ifchange;
3320
    return ifchange;
3321 3322 3323
}

/* destroy an existing ifchange queue for a specific socket */
3324
static void sock_release_ifchange( struct sock *sock )
3325
{
3326
    if (sock->ifchange_obj)
3327 3328 3329
    {
        list_remove( &sock->ifchange_entry );
        release_object( sock->ifchange_obj );
3330
        sock->ifchange_obj = NULL;
3331 3332 3333
    }
}

3334
static void socket_device_dump( struct object *obj, int verbose );
3335 3336
static struct object *socket_device_lookup_name( struct object *obj, struct unicode_str *name,
                                                 unsigned int attr, struct object *root );
3337 3338 3339 3340 3341 3342
static struct object *socket_device_open_file( struct object *obj, unsigned int access,
                                               unsigned int sharing, unsigned int options );

static const struct object_ops socket_device_ops =
{
    sizeof(struct object),      /* size */
3343
    &device_type,               /* type */
3344 3345 3346 3347 3348 3349 3350
    socket_device_dump,         /* dump */
    no_add_queue,               /* add_queue */
    NULL,                       /* remove_queue */
    NULL,                       /* signaled */
    no_satisfied,               /* satisfied */
    no_signal,                  /* signal */
    no_get_fd,                  /* get_fd */
3351
    default_map_access,         /* map_access */
3352 3353
    default_get_sd,             /* get_sd */
    default_set_sd,             /* set_sd */
3354
    default_get_full_name,      /* get_full_name */
3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368
    socket_device_lookup_name,  /* lookup_name */
    directory_link_name,        /* link_name */
    default_unlink_name,        /* unlink_name */
    socket_device_open_file,    /* open_file */
    no_kernel_obj_list,         /* get_kernel_obj_list */
    no_close_handle,            /* close_handle */
    no_destroy                  /* destroy */
};

static void socket_device_dump( struct object *obj, int verbose )
{
    fputs( "Socket device\n", stderr );
}

3369 3370
static struct object *socket_device_lookup_name( struct object *obj, struct unicode_str *name,
                                                 unsigned int attr, struct object *root )
3371
{
3372
    if (name) name->len = 0;
3373 3374 3375 3376 3377 3378
    return NULL;
}

static struct object *socket_device_open_file( struct object *obj, unsigned int access,
                                               unsigned int sharing, unsigned int options )
{
3379 3380 3381 3382 3383 3384 3385 3386 3387
    struct sock *sock;

    if (!(sock = create_socket())) return NULL;
    if (!(sock->fd = alloc_pseudo_fd( &sock_fd_ops, &sock->obj, options )))
    {
        release_object( sock );
        return NULL;
    }
    return &sock->obj;
3388 3389
}

3390 3391
struct object *create_socket_device( struct object *root, const struct unicode_str *name,
                                     unsigned int attr, const struct security_descriptor *sd )
3392
{
3393
    return create_named_object( root, &socket_device_ops, name, attr, sd );
3394 3395
}

3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407
DECL_HANDLER(recv_socket)
{
    struct sock *sock = (struct sock *)get_handle_obj( current->process, req->async.handle, 0, &sock_ops );
    unsigned int status = req->status;
    timeout_t timeout = 0;
    struct async *async;
    struct fd *fd;

    if (!sock) return;
    fd = sock->fd;

    /* recv() returned EWOULDBLOCK, i.e. no data available yet */
3408
    if (status == STATUS_DEVICE_NOT_READY && !sock->nonblocking)
3409 3410 3411 3412 3413 3414 3415
    {
        /* Set a timeout on the async if necessary.
         *
         * We want to do this *only* if the client gave us STATUS_DEVICE_NOT_READY.
         * If the client gave us STATUS_PENDING, it expects the async to always
         * block (it was triggered by WSARecv*() with a valid OVERLAPPED
         * structure) and for the timeout not to be respected. */
3416 3417
        if (is_fd_overlapped( fd ))
            timeout = (timeout_t)sock->rcvtimeo * -10000;
3418 3419 3420 3421

        status = STATUS_PENDING;
    }

3422 3423
    if ((status == STATUS_PENDING || status == STATUS_DEVICE_NOT_READY) && sock->rd_shutdown)
        status = STATUS_PIPE_DISCONNECTED;
3424

3425 3426
    sock->pending_events &= ~(req->oob ? AFD_POLL_OOB : AFD_POLL_READ);
    sock->reported_events &= ~(req->oob ? AFD_POLL_OOB : AFD_POLL_READ);
3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446

    if ((async = create_request_async( fd, get_fd_comp_flags( fd ), &req->async )))
    {
        if (status == STATUS_SUCCESS)
        {
            struct iosb *iosb = async_get_iosb( async );
            iosb->result = req->total;
            release_object( iosb );
        }
        set_error( status );

        if (timeout)
            async_set_timeout( async, timeout, STATUS_IO_TIMEOUT );

        if (status == STATUS_PENDING)
            queue_async( &sock->read_q, async );

        /* always reselect; we changed reported_events above */
        sock_reselect( sock );

3447
        reply->wait = async_handoff( async, NULL, 0 );
3448 3449 3450 3451 3452
        reply->options = get_fd_options( fd );
        release_object( async );
    }
    release_object( sock );
}
3453

3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464
DECL_HANDLER(send_socket)
{
    struct sock *sock = (struct sock *)get_handle_obj( current->process, req->async.handle, 0, &sock_ops );
    unsigned int status = req->status;
    timeout_t timeout = 0;
    struct async *async;
    struct fd *fd;

    if (!sock) return;
    fd = sock->fd;

3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475
    if (sock->type == WS_SOCK_DGRAM)
    {
        /* sendto() and sendmsg() implicitly binds a socket */
        union unix_sockaddr unix_addr;
        socklen_t unix_len = sizeof(unix_addr);

        if (!sock->bound && !getsockname( get_unix_fd( fd ), &unix_addr.addr, &unix_len ))
            sock->addr_len = sockaddr_from_unix( &unix_addr, &sock->addr.addr, sizeof(sock->addr) );
        sock->bound = 1;
    }

3476 3477 3478
    if (status != STATUS_SUCCESS)
    {
        /* send() calls only clear and reselect events if unsuccessful. */
3479 3480
        sock->pending_events &= ~AFD_POLL_WRITE;
        sock->reported_events &= ~AFD_POLL_WRITE;
3481 3482 3483 3484 3485 3486
    }

    /* If we had a short write and the socket is nonblocking (and the client is
     * not trying to force the operation to be asynchronous), return success.
     * Windows actually refuses to send any data in this case, and returns
     * EWOULDBLOCK, but we have no way of doing that. */
3487
    if (status == STATUS_DEVICE_NOT_READY && req->total && sock->nonblocking)
3488 3489 3490
        status = STATUS_SUCCESS;

    /* send() returned EWOULDBLOCK or a short write, i.e. cannot send all data yet */
3491
    if (status == STATUS_DEVICE_NOT_READY && !sock->nonblocking)
3492 3493 3494 3495 3496 3497 3498
    {
        /* Set a timeout on the async if necessary.
         *
         * We want to do this *only* if the client gave us STATUS_DEVICE_NOT_READY.
         * If the client gave us STATUS_PENDING, it expects the async to always
         * block (it was triggered by WSASend*() with a valid OVERLAPPED
         * structure) and for the timeout not to be respected. */
3499 3500
        if (is_fd_overlapped( fd ))
            timeout = (timeout_t)sock->sndtimeo * -10000;
3501 3502 3503 3504

        status = STATUS_PENDING;
    }

3505 3506
    if ((status == STATUS_PENDING || status == STATUS_DEVICE_NOT_READY) && sock->wr_shutdown)
        status = STATUS_PIPE_DISCONNECTED;
3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526

    if ((async = create_request_async( fd, get_fd_comp_flags( fd ), &req->async )))
    {
        if (status == STATUS_SUCCESS)
        {
            struct iosb *iosb = async_get_iosb( async );
            iosb->result = req->total;
            release_object( iosb );
        }
        set_error( status );

        if (timeout)
            async_set_timeout( async, timeout, STATUS_IO_TIMEOUT );

        if (status == STATUS_PENDING)
            queue_async( &sock->write_q, async );

        /* always reselect; we changed reported_events above */
        sock_reselect( sock );

3527
        reply->wait = async_handoff( async, NULL, 0 );
3528 3529 3530 3531 3532
        reply->options = get_fd_options( fd );
        release_object( async );
    }
    release_object( sock );
}