--- hurdselect_orig.c 2012-10-21 22:55:26.000000000 +0200 +++ hurdselect_step1.c 2012-12-12 19:29:26.000000000 +0100 @@ -37,6 +37,13 @@ each of READFDS, WRITEFDS, EXCEPTFDS that is nonnull. If TIMEOUT is not NULL, time out after waiting the interval specified therein. Returns the number of ready descriptors, or -1 for errors. */ + +enum { + DELAY = -1, + SELECT = 0, + POLL = 1 +} ispoll; + int _hurd_select (int nfds, struct pollfd *pollfds, @@ -68,6 +75,52 @@ _hurd_select (int nfds, assert (sizeof (union typeword) == sizeof (mach_msg_type_t)); assert (sizeof (uint32_t) == sizeof (mach_msg_type_t)); + + union + { + mach_msg_header_t head; +#ifdef MACH_MSG_TRAILER_MINIMUM_SIZE + struct + { + mach_msg_header_t head; + NDR_record_t ndr; + error_t err; + } error; + struct + { + mach_msg_header_t head; + NDR_record_t ndr; + error_t err; + int result; + mach_msg_trailer_t trailer; + } success; +#else + struct + { + mach_msg_header_t head; + union typeword err_type; + error_t err; + } error; + struct + { + mach_msg_header_t head; + union typeword err_type; + error_t err; + union typeword result_type; + int result; + } success; +#endif + } msg; + mach_msg_option_t options = (timeout == NULL ? 0 : MACH_RCV_TIMEOUT); + error_t msgerr; +#define IO_SELECT_REPLY_MSGID (21012 + 100) /* XXX */ +#ifdef MACH_MSG_TYPE_BIT + const union typeword inttype = + { type: + { MACH_MSG_TYPE_INTEGER_T, sizeof (integer_t) * 8, 1, 1, 0, 0 } + }; +#endif + if (nfds < 0 || nfds > FD_SETSIZE) { errno = EINVAL; @@ -84,15 +137,114 @@ _hurd_select (int nfds, to = (timeout->tv_sec * 1000 + (timeout->tv_nsec + 999999) / 1000000); - if (strcmp(program_invocation_short_name, "vi") && strcmp(program_invocation_short_name, "vim") && strcmp(program_invocation_short_name, "vimdiff") && !to) - to = 1; } + /* XXX: A timeout of 0 returns immediately, even if no file + descriptors are ready. This is correct according to POSIX.1-2001. + As many programs rely on file descriptors being ready for a + timeout of zero use 1 msec as the minimum delay */ + /* FIXME: replace with something better */ +#if 1 + if (to == 0) + to = 1; +#endif + /* FIXME: sigmask is only used for ppoll and pselect: not yet implemented */ if (sigmask && __sigprocmask (SIG_SETMASK, sigmask, &oset)) return -1; - if (pollfds) + if (nfds == 0) + ispoll = DELAY; + else if (pollfds) + ispoll = POLL; + else + ispoll = SELECT; + + if (nfds > _hurd_dtablesize) + nfds = _hurd_dtablesize; + + switch (ispoll) { + case DELAY: + /* We are just a pure timeout. */ + portset = __mach_reply_port (); + + err = 0; + + /* Now wait for reply messages. */ + if (!err) + { + /* Now wait for io_select_reply messages on PORT, + timing out as appropriate. */ + while ((msgerr = __mach_msg (&msg.head, + MACH_RCV_MSG | options, + 0, sizeof msg, portset, to, + MACH_PORT_NULL)) == MACH_MSG_SUCCESS) + { + /* We got a message. Decode it. */ + if (msg.head.msgh_id == IO_SELECT_REPLY_MSGID && + msg.head.msgh_size >= sizeof msg.error && + !(msg.head.msgh_bits & MACH_MSGH_BITS_COMPLEX) && +#ifdef MACH_MSG_TYPE_BIT + msg.error.err_type.word == inttype.word +#endif + ) + { + /* This is a properly formatted message so far. + See if it is a success or a failure. */ + if (msg.error.err == EINTR && + msg.head.msgh_size == sizeof msg.error) + { + /* EINTR response; poll for further responses + and then return quickly. */ + err = EINTR; + goto exit; + } + if (msg.error.err || + msg.head.msgh_size != sizeof msg.success || +#ifdef MACH_MSG_TYPE_BIT + msg.success.result_type.word != inttype.word || +#endif + (msg.success.result & SELECT_ALL) == 0) + { + /* Error or bogus reply. Simulate readiness. */ + __mach_msg_destroy (&msg.head); + msg.success.result = SELECT_ALL; + } + } /* end ) { */ + + if (msg.head.msgh_remote_port != MACH_PORT_NULL) + __mach_port_deallocate (__mach_task_self (), + msg.head.msgh_remote_port); + } /* while */ + + exit: + if (err == MACH_RCV_TIMED_OUT) + /* This is the normal value for ERR. We might have timed + out and read no messages. Otherwise, after receiving + the first message, we poll for more messages. We + receive with a timeout of 0 to effect a poll, so ERR is + MACH_RCV_TIMED_OUT when the poll finds no message + waiting. */ + err = 0; + + } /* if(!err) */ + + /* Destroy PORTSET, but only if it's not actually the reply + port for a single descriptor (in which case it's + destroyed in the previous loop; not doing it here is just + a bit more efficient). */ + __mach_port_destroy (__mach_task_self (), portset); + + if (err) + { + errno = err; + return -1; + } + else + return 0; + break; + + case POLL: /* Collect interesting descriptors from the user's `pollfd' array. We do a first pass that reads the user's array before taking any locks. The second pass then only touches our own stack, @@ -132,6 +284,7 @@ _hurd_select (int nfds, } /* If one descriptor is bogus, we fail completely. */ + /* This is a bug in the current implementation, see POSIX 2001 */ while (i-- > 0) if (d[i].type != 0) _hurd_port_free (&d[i].cell->port, @@ -142,21 +295,212 @@ _hurd_select (int nfds, __mutex_unlock (&_hurd_dtable_lock); HURD_CRITICAL_END; + /* This is a bug in the current implementation, see POSIX 2001 */ if (i < nfds) { + /* FIXME: Only for ppoll */ if (sigmask) __sigprocmask (SIG_SETMASK, &oset, NULL); errno = EBADF; return -1; } + firstfd = 0; lastfd = i - 1; - firstfd = i == 0 ? lastfd : 0; - } - else - { + err = 0; + got = 0; + + /* Send them all io_select request messages. */ + portset = MACH_PORT_NULL; + + for (i = firstfd; i <= lastfd; ++i) + if (d[i].type) + { + int type = d[i].type; + d[i].reply_port = __mach_reply_port (); + err = __io_select (d[i].io_port, d[i].reply_port, + /* Poll only if there's a single descriptor. */ + (firstfd == lastfd) ? to : 0, + &type); + switch (err) + { + case MACH_RCV_TIMED_OUT: + /* No immediate response. This is normal. */ + err = 0; + if (firstfd == lastfd) + /* When there's a single descriptor, we don't need a + portset, so just pretend we have one, but really + use the single reply port. */ + portset = d[i].reply_port; + else if (got == 0) + /* We've got multiple reply ports, so we need a port set to + multiplex them. */ + { + /* We will wait again for a reply later. */ + if (portset == MACH_PORT_NULL) + /* Create the portset to receive all the replies on. */ + err = __mach_port_allocate (__mach_task_self (), + MACH_PORT_RIGHT_PORT_SET, + &portset); + if (! err) + /* Put this reply port in the port set. */ + __mach_port_move_member (__mach_task_self (), + d[i].reply_port, portset); + } + break; + + default: + /* No other error should happen. Callers of select + don't expect to see errors, so we simulate + readiness of the erring object and the next call + hopefully will get the error again. */ + type = SELECT_ALL; + /* FALLTHROUGH */ + + case 0: + /* We got an answer. */ + if ((type & SELECT_ALL) == 0) + /* Bogus answer; treat like an error, as a fake positive. */ + type = SELECT_ALL; + + /* This port is already ready already. */ + d[i].type &= type; + d[i].type |= SELECT_RETURNED; + ++got; + break; + } + _hurd_port_free (&d[i].cell->port, &d[i].ulink, d[i].io_port); + } + + /* Now wait for reply messages. */ + if (!err && got == 0 && (firstfd != lastfd)) + { + /* Now wait for io_select_reply messages on PORT, + timing out as appropriate. */ + + while ((msgerr = __mach_msg (&msg.head, + MACH_RCV_MSG | options, + 0, sizeof msg, portset, to, + MACH_PORT_NULL)) == MACH_MSG_SUCCESS) + { + /* We got a message. Decode it. */ + if (msg.head.msgh_id == IO_SELECT_REPLY_MSGID && + msg.head.msgh_size >= sizeof msg.error && + !(msg.head.msgh_bits & MACH_MSGH_BITS_COMPLEX) && +#ifdef MACH_MSG_TYPE_BIT + msg.error.err_type.word == inttype.word +#endif + ) + { + /* This is a properly formatted message so far. + See if it is a success or a failure. */ + if (msg.error.err == EINTR && + msg.head.msgh_size == sizeof msg.error) + { + /* EINTR response; poll for further responses + and then return quickly. */ + err = EINTR; + goto poll; + } + if (msg.error.err || + msg.head.msgh_size != sizeof msg.success || +#ifdef MACH_MSG_TYPE_BIT + msg.success.result_type.word != inttype.word || +#endif + (msg.success.result & SELECT_ALL) == 0) + { + /* Error or bogus reply. Simulate readiness. */ + __mach_msg_destroy (&msg.head); + msg.success.result = SELECT_ALL; + } + + /* Look up the respondent's reply port and record its + readiness. */ + int had = got; + for (i = firstfd; i <= lastfd; ++i) + if (d[i].type + && d[i].reply_port == msg.head.msgh_local_port) + { + d[i].type &= msg.success.result; + d[i].type |= SELECT_RETURNED; + ++got; + } + assert (got > had); + } + + if (msg.head.msgh_remote_port != MACH_PORT_NULL) + __mach_port_deallocate (__mach_task_self (), + msg.head.msgh_remote_port); + + if (got) + poll: + { + /* Poll for another message. */ + to = 0; + options |= MACH_RCV_TIMEOUT; + } + } + + if (err == MACH_RCV_TIMED_OUT) + /* This is the normal value for ERR. We might have timed out and + read no messages. Otherwise, after receiving the first message, + we poll for more messages. We receive with a timeout of 0 to + effect a poll, so ERR is MACH_RCV_TIMED_OUT when the poll finds no + message waiting. */ + err = 0; + + if (got) + /* At least one descriptor is known to be ready now, so we will + return success. */ + err = 0; + } + + for (i = firstfd; i <= lastfd; ++i) + if (d[i].type) + __mach_port_destroy (__mach_task_self (), d[i].reply_port); + if ((firstfd != lastfd) && (portset != MACH_PORT_NULL)) + /* Destroy PORTSET, but only if it's not actually the reply + port for a single descriptor (in which case it's destroyed + in the previous loop; not doing it here is just a bit more + efficient). */ + __mach_port_destroy (__mach_task_self (), portset); + + if (err) + { + /* FIXME: Only for ppoll */ + if (sigmask) + __sigprocmask (SIG_SETMASK, &oset, NULL); + return __hurd_fail (err); + } + + /* Fill in the `revents' members of the user's array. */ + for (i = 0; i < nfds; ++i) + { + int type = d[i].type; + int_fast16_t revents = 0; + + if (type & SELECT_RETURNED) + { + if (type & SELECT_READ) + revents |= POLLIN; + if (type & SELECT_WRITE) + revents |= POLLOUT; + if (type & SELECT_URG) + revents |= POLLPRI; + } + pollfds[i].revents = revents; + } + + /* FIXME: Only for ppoll */ + if (sigmask && __sigprocmask (SIG_SETMASK, &oset, NULL)) + return -1; + + return got; + break; + + case SELECT: /* Collect interested descriptors from the user's fd_set arguments. - Use local copies so we can't crash from user bogosity. */ + Use local copies so we can't crash from user bogosity. */ if (readfds == NULL) FD_ZERO (&rfds); @@ -174,11 +518,7 @@ _hurd_select (int nfds, HURD_CRITICAL_BEGIN; __mutex_lock (&_hurd_dtable_lock); - if (nfds > _hurd_dtablesize) - nfds = _hurd_dtablesize; - - /* Collect the ports for interesting FDs. */ - firstfd = lastfd = -1; + /* Collect the ports for interesting FDs. */ for (i = 0; i < nfds; ++i) { int type = 0; @@ -203,8 +543,6 @@ _hurd_select (int nfds, break; } lastfd = i; - if (firstfd == -1) - firstfd = i; } } @@ -213,25 +551,17 @@ _hurd_select (int nfds, if (i < nfds) { + /* FIXME: Only for pselect */ if (sigmask) __sigprocmask (SIG_SETMASK, &oset, NULL); errno = EBADF; return -1; } - } - - - err = 0; - got = 0; - - /* Send them all io_select request messages. */ + firstfd = 0; + err = 0; + got = 0; - if (firstfd == -1) - /* But not if there were no ports to deal with at all. - We are just a pure timeout. */ - portset = __mach_reply_port (); - else - { + /* Send them all io_select request messages. */ portset = MACH_PORT_NULL; for (i = firstfd; i <= lastfd; ++i) @@ -284,7 +614,7 @@ _hurd_select (int nfds, /* Bogus answer; treat like an error, as a fake positive. */ type = SELECT_ALL; - /* This port is already ready already. */ + /* This port is already ready already. */ d[i].type &= type; d[i].type |= SELECT_RETURNED; ++got; @@ -292,99 +622,51 @@ _hurd_select (int nfds, } _hurd_port_free (&d[i].cell->port, &d[i].ulink, d[i].io_port); } - } - - /* Now wait for reply messages. */ - if (!err && got == 0) - { - /* Now wait for io_select_reply messages on PORT, - timing out as appropriate. */ - union + /* Now wait for reply messages. */ + if (!err && got == 0 && (firstfd != lastfd)) { - mach_msg_header_t head; -#ifdef MACH_MSG_TRAILER_MINIMUM_SIZE - struct + /* Now wait for io_select_reply messages on PORT, + timing out as appropriate. */ + while ((msgerr = __mach_msg (&msg.head, + MACH_RCV_MSG | options, + 0, sizeof msg, portset, to, + MACH_PORT_NULL)) == MACH_MSG_SUCCESS) { - mach_msg_header_t head; - NDR_record_t ndr; - error_t err; - } error; - struct - { - mach_msg_header_t head; - NDR_record_t ndr; - error_t err; - int result; - mach_msg_trailer_t trailer; - } success; -#else - struct - { - mach_msg_header_t head; - union typeword err_type; - error_t err; - } error; - struct - { - mach_msg_header_t head; - union typeword err_type; - error_t err; - union typeword result_type; - int result; - } success; -#endif - } msg; - mach_msg_option_t options = (timeout == NULL ? 0 : MACH_RCV_TIMEOUT); - error_t msgerr; - while ((msgerr = __mach_msg (&msg.head, - MACH_RCV_MSG | options, - 0, sizeof msg, portset, to, - MACH_PORT_NULL)) == MACH_MSG_SUCCESS) - { - /* We got a message. Decode it. */ -#define IO_SELECT_REPLY_MSGID (21012 + 100) /* XXX */ -#ifdef MACH_MSG_TYPE_BIT - const union typeword inttype = - { type: - { MACH_MSG_TYPE_INTEGER_T, sizeof (integer_t) * 8, 1, 1, 0, 0 } - }; -#endif - if (msg.head.msgh_id == IO_SELECT_REPLY_MSGID && - msg.head.msgh_size >= sizeof msg.error && - !(msg.head.msgh_bits & MACH_MSGH_BITS_COMPLEX) && + /* We got a message. Decode it. */ + if (msg.head.msgh_id == IO_SELECT_REPLY_MSGID && + msg.head.msgh_size >= sizeof msg.error && + !(msg.head.msgh_bits & MACH_MSGH_BITS_COMPLEX) && #ifdef MACH_MSG_TYPE_BIT - msg.error.err_type.word == inttype.word + msg.error.err_type.word == inttype.word #endif - ) - { - /* This is a properly formatted message so far. - See if it is a success or a failure. */ - if (msg.error.err == EINTR && - msg.head.msgh_size == sizeof msg.error) + ) { - /* EINTR response; poll for further responses - and then return quickly. */ - err = EINTR; - goto poll; - } - if (msg.error.err || - msg.head.msgh_size != sizeof msg.success || + /* This is a properly formatted message so far. + See if it is a success or a failure. */ + if (msg.error.err == EINTR && + msg.head.msgh_size == sizeof msg.error) + { + /* EINTR response; poll for further responses + and then return quickly. */ + err = EINTR; + goto poll2; + } + if (msg.error.err || + msg.head.msgh_size != sizeof msg.success || #ifdef MACH_MSG_TYPE_BIT - msg.success.result_type.word != inttype.word || + msg.success.result_type.word != inttype.word || #endif - (msg.success.result & SELECT_ALL) == 0) - { - /* Error or bogus reply. Simulate readiness. */ - __mach_msg_destroy (&msg.head); - msg.success.result = SELECT_ALL; - } - - /* Look up the respondent's reply port and record its - readiness. */ - { - int had = got; - if (firstfd != -1) + (msg.success.result & SELECT_ALL) == 0) + { + /* Error or bogus reply. Simulate readiness. */ + __mach_msg_destroy (&msg.head); + msg.success.result = SELECT_ALL; + } + + /* Look up the respondent's reply port and record its + readiness. */ + int had = got; for (i = firstfd; i <= lastfd; ++i) if (d[i].type && d[i].reply_port == msg.head.msgh_local_port) @@ -393,106 +675,90 @@ _hurd_select (int nfds, d[i].type |= SELECT_RETURNED; ++got; } - assert (got > had); - } - } + assert (got > had); + } - if (msg.head.msgh_remote_port != MACH_PORT_NULL) - __mach_port_deallocate (__mach_task_self (), - msg.head.msgh_remote_port); + if (msg.head.msgh_remote_port != MACH_PORT_NULL) + __mach_port_deallocate (__mach_task_self (), + msg.head.msgh_remote_port); - if (got) - poll: - { - /* Poll for another message. */ - to = 0; - options |= MACH_RCV_TIMEOUT; + if (got) + poll2: + { + /* Poll for another message. */ + to = 0; + options |= MACH_RCV_TIMEOUT; + } } - } - if (err == MACH_RCV_TIMED_OUT) - /* This is the normal value for ERR. We might have timed out and - read no messages. Otherwise, after receiving the first message, - we poll for more messages. We receive with a timeout of 0 to - effect a poll, so ERR is MACH_RCV_TIMED_OUT when the poll finds no - message waiting. */ - err = 0; - - if (got) - /* At least one descriptor is known to be ready now, so we will - return success. */ - err = 0; - } + if (err == MACH_RCV_TIMED_OUT) + /* This is the normal value for ERR. We might have timed out and + read no messages. Otherwise, after receiving the first message, + we poll for more messages. We receive with a timeout of 0 to + effect a poll, so ERR is MACH_RCV_TIMED_OUT when the poll finds no + message waiting. */ + err = 0; - if (firstfd != -1) - for (i = firstfd; i <= lastfd; ++i) - if (d[i].type) - __mach_port_destroy (__mach_task_self (), d[i].reply_port); - if (firstfd == -1 || (firstfd != lastfd && portset != MACH_PORT_NULL)) - /* Destroy PORTSET, but only if it's not actually the reply port for a - single descriptor (in which case it's destroyed in the previous loop; - not doing it here is just a bit more efficient). */ - __mach_port_destroy (__mach_task_self (), portset); - - if (err) - { - if (sigmask) - __sigprocmask (SIG_SETMASK, &oset, NULL); - return __hurd_fail (err); - } - - if (pollfds) - /* Fill in the `revents' members of the user's array. */ - for (i = 0; i < nfds; ++i) - { - int type = d[i].type; - int_fast16_t revents = 0; + if (got) + /* At least one descriptor is known to be ready now, so we will + return success. */ + err = 0; + } - if (type & SELECT_RETURNED) - { - if (type & SELECT_READ) - revents |= POLLIN; - if (type & SELECT_WRITE) - revents |= POLLOUT; - if (type & SELECT_URG) - revents |= POLLPRI; - } + for (i = firstfd; i <= lastfd; ++i) + if (d[i].type) + __mach_port_destroy (__mach_task_self (), d[i].reply_port); + if ((firstfd != lastfd) && (portset != MACH_PORT_NULL)) + /* Destroy PORTSET, but only if it's not actually the reply port for a + single descriptor (in which case it's destroyed in the previous loop; + not doing it here is just a bit more efficient). */ + __mach_port_destroy (__mach_task_self (), portset); + + if (err) + { + /* FIXME: Only for pselect */ + if (sigmask) + __sigprocmask (SIG_SETMASK, &oset, NULL); + return __hurd_fail (err); + } - pollfds[i].revents = revents; - } - else - { /* Below we recalculate GOT to include an increment for each operation allowed on each fd. */ got = 0; /* Set the user bitarrays. We only ever have to clear bits, as all desired ones are initially set. */ - if (firstfd != -1) - for (i = firstfd; i <= lastfd; ++i) - { - int type = d[i].type; + for (i = firstfd; i <= lastfd; ++i) + { + int type = d[i].type; + + if ((type & SELECT_RETURNED) == 0) + type = 0; + + if (type & SELECT_READ) + got++; + else if (readfds) + FD_CLR (i, readfds); + if (type & SELECT_WRITE) + got++; + else if (writefds) + FD_CLR (i, writefds); + if (type & SELECT_URG) + got++; + else if (exceptfds) + FD_CLR (i, exceptfds); + } - if ((type & SELECT_RETURNED) == 0) - type = 0; + /* FIXME: Only for pselect */ + if (sigmask && __sigprocmask (SIG_SETMASK, &oset, NULL)) + return -1; - if (type & SELECT_READ) - got++; - else if (readfds) - FD_CLR (i, readfds); - if (type & SELECT_WRITE) - got++; - else if (writefds) - FD_CLR (i, writefds); - if (type & SELECT_URG) - got++; - else if (exceptfds) - FD_CLR (i, exceptfds); - } - } + return got; + break; - if (sigmask && __sigprocmask (SIG_SETMASK, &oset, NULL)) - return -1; + default: + errno = EINVAL; + return -1; - return got; + } /* switch ispoll */ }