Goals * Define semantics of all routines so that others can write to the interface * Primary design is for MPICH2, but should also be useable by SMPD, etc. * Allow implementation of passive target OSC (asynchronous progress engine) * Allow fault tolerant implementation of dynamic process interface -------------------- Requirements * sock_post_close() must be able to interrupt a posted read and/or write. What about a connect? What about a listener with accepted socks in the queue? * Error codes must be both descriptive of the problem and allow programatic decisions to be made. * It would be useful if post_read/write() could handle a byte range (min/max) * We need an interface for getting the list of IP addresses associated with a machine. * Restrict connections and listeners to a range of ports (req #125) * Must be reasonably thread-safe and thread supportive. For example, it must be possible to wake up a sock_wait() blocking on a sock_set. It should also be possible to post a new read or write while a thread is blocked in sock_wait(). * Do we need anything special for proxy communication? Do we even want to support it? -------------------- Issues * sock_get_last_os_error()? portability? thread safety? * if we use MPI error codes, we will need to link part of MPICH2 into SMPD. this could cause initialization problems. * MPI error codes are not descriptive enough to allow programatic response to errors. we could extend the MPI error classes to better define internal errors. the top-level routine (or maybe ADI3 routine) would be responsible for returning a proper error class. * MPIR_Err_init/finalize needed so the error handling code can be initialized without calling MPI_Init. Error handling code cannot depend on any MPI structures like comm_world. * We could change the error code stuff to use constants instead of strings. For now we can add a lookup function like MPIR_Err_check_code(mpi_errno, "**err class string") that returns true if the error code matches the error class string. -------------------- /* * definitions */ typedef enum MPIDU_Sock_op { SOCK_OP_READ, SOCK_OP_WRITE, SOCK_OP_ACCEPT, SOCK_OP_CONNECT, SOCK_OP_CLOSE } MPIDU_Sock_op_t; /* * structures */ typedef struct MPIDU_Sock_event { MPIDU_Sock_op_t op_type; MPIDU_Sock_size_t num_bytes; void *user_ptr; int error; } MPIDU_Sock_event_t; /* Progress update callback functions * * If a function pointer is passed to one of the MPIDU_Sock_post_... functions the following applies: * * 1) The sock progress engine will call this function when partial data has been read or written for the posted operation. * * 2) All progress_update calls must complete before completion notification is signalled. In other words, MPIDU_Sock_wait will * not return until all progress_update calls have completed. */ typedef int (*MPIDU_Sock_progress_update_func_t)(MPIDU_Sock_size_t num_bytes, void *user_ptr); /* * function prototypes */ int MPIDU_Sock_init(void); int MPIDU_Sock_finalize(void); int MPIDU_Sock_get_host_description(int myRank, char * host_description, int len); int MPIDU_Sock_create_set(MPIDU_Sock_set_t * set); int MPIDU_Sock_destroy_set(MPIDU_Sock_set_t set); int MPIDU_Sock_set_user_ptr(MPIDU_Sock_t sock, void * user_ptr); int MPIDU_Sock_listen(MPIDU_Sock_set_t set, void * user_ptr, int * port, MPIDU_Sock_t * listener); int MPIDU_Sock_post_connect(MPIDU_Sock_set_t set, void * user_ptr, char * host_description, int port, MPIDU_Sock_t * connected); int MPIDU_Sock_post_close(MPIDU_Sock_t sock); int MPIDU_Sock_post_read(MPIDU_Sock_t sock, void * buf, MPIDU_Sock_size_t min, MPIDU_Sock_size_t max, MPIDU_Sock_progress_update_func_t fn); int MPIDU_Sock_post_readv(MPIDU_Sock_t sock, MPID_IOV * iov, int n, MPIDU_Sock_progress_update_func_t fn); int MPIDU_Sock_post_write(MPIDU_Sock_t sock, void * buf, MPIDU_Sock_size_t min, MPIDU_Sock_size_t max, MPIDU_Sock_progress_update_func_t fn); int MPIDU_Sock_post_writev(MPIDU_Sock_t sock, MPID_IOV * iov, int n, MPIDU_Sock_progress_update_func_t fn); int MPIDU_Sock_wait(MPIDU_Sock_set_t set, int millisecond_timeout, MPIDU_Sock_event_t * out); int MPIDU_Sock_wakeup(MPIDU_Sock_set_t set); int MPIDU_Sock_accept(MPIDU_Sock_t listener, MPIDU_Sock_set_t set, void * user_ptr, MPIDU_Sock_t * accepted); int MPIDU_Sock_read(MPIDU_Sock_t sock, void * buf, MPIDU_Sock_size_t len, MPIDU_Sock_size_t * num_read); int MPIDU_Sock_readv(MPIDU_Sock_t sock, MPID_IOV * iov, int n, MPIDU_Sock_size_t * num_read); int MPIDU_Sock_write(MPIDU_Sock_t sock, void * buf, MPIDU_Sock_size_t len, MPIDU_Sock_size_t * num_written); int MPIDU_Sock_writev(MPIDU_Sock_t sock, MPID_IOV * iov, int n, MPIDU_Sock_size_t * num_written); int MPIDU_Sock_native_to_sock(MPIDU_Sock_set_t set, MPIDU_SOCK_NATIVE_FD fd, void *user_ptr, MPIDU_Sock_t *sock_ptr); /* extended functions */ int MPIDU_Sock_getid(MPIDU_Sock_t sock); int MPIDU_Sock_getsetid(MPIDU_Sock_set_t set); int MPIDU_Sock_get_last_os_error(void); #define MPIDU_SOCK_STATE_LIST \ MPID_STATE_MPIDU_SOCK_INIT, \ MPID_STATE_MPIDU_SOCK_FINALIZE, \ MPID_STATE_MPIDU_SOCK_GET_HOST_DESCRIPTION, \ MPID_STATE_MPIDU_SOCK_CREATE_SET, \ MPID_STATE_MPIDU_SOCK_DESTROY_SET, \ MPID_STATE_MPIDU_SOCK_LISTEN, \ MPID_STATE_MPIDU_SOCK_POST_CONNECT, \ MPID_STATE_MPIDU_SOCK_ACCEPT, \ MPID_STATE_MPIDU_SOCK_POST_CLOSE, \ MPID_STATE_MPIDU_SOCK_WAIT, \ MPID_STATE_MPIDU_SOCK_WAKEUP, \ MPID_STATE_MPIDU_SOCK_SET_USER_PTR, \ MPID_STATE_MPIDU_SOCK_READ, \ MPID_STATE_MPIDU_SOCK_READV, \ MPID_STATE_MPIDU_SOCK_WRITE, \ MPID_STATE_MPIDU_SOCK_WRITEV, \ MPID_STATE_MPIDU_SOCK_POST_READ, \ MPID_STATE_MPIDU_SOCK_POST_READV, \ MPID_STATE_MPIDU_SOCK_POST_WRITE, \ MPID_STATE_MPIDU_SOCK_POST_WRITEV, \ MPID_STATE_MPIDU_SOCK_NATIVE_TO_SOCK, \ MPID_STATE_MPIDU_SOCK_GETID, \ MPID_STATE_MPIDU_SOCK_GETSETID, \ MPID_STATE_MPIDU_SOCK_GET_LAST_OS_ERROR, \ MPIDU_SOCKI_STATE_LIST