From a4079cf1b53d7b47d6b20df44f60da784da228a2 Mon Sep 17 00:00:00 2001 From: Rainer Gerhards Date: Thu, 15 Nov 2018 18:36:20 +0100 Subject: [PATCH] testbench: add test for receiver abort and restart Also - much improved testbench framework and tools to support this type of test. Done some general improvements to the tooling. - imported rsyslog tool "chkseq" for checking - added more error messages to code still not fully done and probably needs some review. but even for testbench runs the current set of error messages was too small to be useful. see also https://github.com/rsyslog/librelp/issues/130 --- configure.ac | 7 +- src/copen.c | 1 - src/relpsess.c | 35 +++--- src/scrsp.c | 5 + src/tcp.c | 6 +- tests/Makefile.am | 8 +- tests/basic-realistic.sh | 2 +- tests/chkseq.c | 249 +++++++++++++++++++++++++++++++++++++++ tests/dummyserver.py | 22 ++++ tests/receive.c | 67 ++++++++--- tests/receiver-abort.sh | 30 +++++ tests/send.c | 87 +++++++++++--- tests/test-framework.sh | 48 ++++++-- 13 files changed, 507 insertions(+), 60 deletions(-) create mode 100644 tests/chkseq.c create mode 100755 tests/dummyserver.py create mode 100755 tests/receiver-abort.sh diff --git a/configure.ac b/configure.ac index efc20c91..0040aacc 100644 --- a/configure.ac +++ b/configure.ac @@ -3,8 +3,11 @@ AC_PREREQ(2.61) AC_INIT([librelp], [1.2.19.master], [rgerhards@adiscon.com]) -AM_INIT_AUTOMAKE -AM_INIT_AUTOMAKE + +# change to the one below if Travis has a timeout +#AM_INIT_AUTOMAKE([subdir-objects serial-tests]) +AM_INIT_AUTOMAKE([subdir-objects]) + m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])]) AC_CONFIG_SRCDIR([src/relp.c]) AC_CONFIG_HEADER([config.h]) diff --git a/src/copen.c b/src/copen.c index 46de7433..614640b4 100644 --- a/src/copen.c +++ b/src/copen.c @@ -143,7 +143,6 @@ BEGINcommand(S, Init) relpSessSendResponse(pSess, pFrame->txnr, replymsg, sizeof(replymsg) - 1); ABORT_FINALIZE(RELP_RET_SESSION_OPEN); } - CHKRet(relpOffersConstructFromFrame(&pCltOffers, pFrame)); CHKRet(selectOffers(pSess, pCltOffers, &pSrvOffers)); diff --git a/src/relpsess.c b/src/relpsess.c index 62c24315..ad28a359 100644 --- a/src/relpsess.c +++ b/src/relpsess.c @@ -51,6 +51,7 @@ #include "relp.h" #include "relpsess.h" #include "relpframe.h" +#include "relpclt.h" #include "sendq.h" #include "offers.h" #include "dbllinklist.h" @@ -67,25 +68,24 @@ callOnErr(const relpSess_t *__restrict__ const pThis, const relpRetVal ecode) { char objinfo[1024]; + relpTcp_t *const pTcp = pThis->pTcp; //pThis->pEngine->dbgprint("librelp: generic error: ecode %d, " //"emsg '%s'\n", ecode, emsg); if(pThis->pEngine->onErr != NULL) { -#if 0 // TODO: FIXME - if(pThis->pSrv == NULL) { /* client */ + if(pTcp->pSrv == NULL) { /* client */ snprintf(objinfo, sizeof(objinfo), "conn to srvr %s:%s", - pThis->pClt->pSess->srvAddr, - pThis->pClt->pSess->srvPort); - } else if(pThis->pRemHostIP == NULL) { /* server listener */ + pTcp->pClt->pSess->srvAddr, + pTcp->pClt->pSess->srvPort); + } else if(pTcp->pRemHostIP == NULL) { /* server listener */ snprintf(objinfo, sizeof(objinfo), "lstn %s", - pThis->pSrv->pLstnPort); + pTcp->pSrv->pLstnPort); } else { /* server connection to client */ snprintf(objinfo, sizeof(objinfo), "lstn %s: conn to clt %s/%s", - pThis->pSrv->pLstnPort, pThis->pRemHostIP, - pThis->pRemHostName); + pTcp->pSrv->pLstnPort, pTcp->pRemHostIP, + pTcp->pRemHostName); } objinfo[sizeof(objinfo)-1] = '\0'; -#endif - pThis->pEngine->onErr(pThis->pUsr, "session", emsg, ecode); + pThis->pEngine->onErr(pThis->pUsr, objinfo, emsg, ecode); } } @@ -276,8 +276,9 @@ relpSessRcvData(relpSess_t *pThis) */ pThis->sessState = eRelpSessState_BROKEN; ABORT_FINALIZE(RELP_RET_SESSION_BROKEN); - } else if ((int) lenBuf == -1) { /* I don't know why we need to cast to int, but we must... */ + } else if ((int) lenBuf == -1) { if(errno != EAGAIN) { + callOnErr(pThis, "error when receiving data, session broken", RELP_RET_SESSION_BROKEN); pThis->pEngine->dbgprint("errno %d during relp session %p, session broken\n", errno, (void*)pThis); pThis->sessState = eRelpSessState_BROKEN; @@ -318,6 +319,7 @@ relpSessSendResponse(relpSess_t *pThis, relpTxnr_t txnr, unsigned char *pData, s finalize_it: if(iRet != RELP_RET_OK) { if(iRet == RELP_RET_IO_ERR) { + callOnErr(pThis, "io error, session broken", RELP_RET_SESSION_BROKEN); pThis->pEngine->dbgprint("relp session %p is broken, io error\n", (void*)pThis); pThis->sessState = eRelpSessState_BROKEN; } @@ -565,7 +567,7 @@ relpSessWaitState(relpSess_t *const pThis, const relpSessState_t stateExpected, pfd.fd = sock; pfd.events = POLLIN; pThis->pEngine->dbgprint("relpSessWaitRsp waiting for data on " - "fd %d, timeout %d\n", sock, timeout); + "fd %d, timeout %d, state expected %d\n", sock, timeout, stateExpected); nfds = poll(&pfd, 1, timeout*1000); if(nfds == -1) { if(errno == EINTR) { @@ -597,6 +599,8 @@ relpSessWaitState(relpSess_t *const pThis, const relpSessState_t stateExpected, iRet == RELP_RET_SESSION_BROKEN || relpEngineShouldStop(pThis->pEngine)) { /* the session is broken! */ + callOnErr(pThis, "error waiting on required session state, session broken", + RELP_RET_SESSION_BROKEN); pThis->sessState = eRelpSessState_BROKEN; } @@ -628,6 +632,7 @@ relpSessRawSendCommand(relpSess_t *pThis, unsigned char *pCmd, size_t lenCmd, if(iRet == RELP_RET_IO_ERR) { pThis->pEngine->dbgprint("relp session %p flagged as broken, IO error\n", (void*)pThis); + callOnErr(pThis, "io error in RawSendCommand, session broken", RELP_RET_SESSION_BROKEN); pThis->sessState = eRelpSessState_BROKEN; ABORT_FINALIZE(RELP_RET_SESSION_BROKEN); } @@ -791,6 +796,7 @@ relpSessCBrspOpen(relpSess_t *pThis, relpFrame_t *pFrame) pEngine->dbgprint("ignoring unknown server offer '%s'\n", pOffer->szName); } } + relpSessSetSessState(pThis, eRelpSessState_INIT_RSP_RCVD); finalize_it: @@ -816,8 +822,10 @@ relpSessCltConnChkOffers(relpSess_t *pThis) ABORT_FINALIZE(RELP_RET_RQD_FEAT_MISSING); finalize_it: - if(iRet != RELP_RET_OK) + if(iRet != RELP_RET_OK) { + callOnErr(pThis, "error in CltConnChkOffers, session broken", RELP_RET_SESSION_BROKEN); pThis->sessState = eRelpSessState_BROKEN; + } LEAVE_RELPFUNC; } @@ -876,7 +884,6 @@ relpSessConnect(relpSess_t *pThis, int protFamily, unsigned char *port, unsigned CHKRet(relpOffersToString(pOffers, NULL, 0, &pszOffers, &lenOffers)); CHKRet(relpOffersDestruct(&pOffers)); -pThis->pEngine->dbgprint("sending open command\n"); CHKRet(relpSessRawSendCommand(pThis, (unsigned char*)"open", 4, pszOffers, lenOffers, relpSessCBrspOpen)); relpSessSetSessState(pThis, eRelpSessState_INIT_CMD_SENT); diff --git a/src/scrsp.c b/src/scrsp.c index 811a0715..51976f24 100644 --- a/src/scrsp.c +++ b/src/scrsp.c @@ -31,6 +31,7 @@ * development. */ #include "config.h" +#include #include #include #include @@ -106,6 +107,10 @@ BEGINcommand(S, Rsp) } CHKRet(relpSendbufDestruct(&pSendbuf)); } else { + if(pSess->pEngine->onErr != NULL) { + pSess->pEngine->onErr(pSess->pUsr, "rsp command", + "peer sent error response", RELP_RET_RSP_STATE_ERR); + } iRet = RELP_RET_RSP_STATE_ERR; /* TODO: add a hock to a logger function of the caller */ relpSendbufDestruct(&pSendbuf); /* don't set error code */ diff --git a/src/tcp.c b/src/tcp.c index 0ea92ea8..dd6b4f2f 100644 --- a/src/tcp.c +++ b/src/tcp.c @@ -658,14 +658,17 @@ relpTcpDestruct(relpTcp_t **ppThis) RELPOBJ_assert(pThis, Tcp); if(pThis->sock != -1) { + shutdown(pThis->sock, SHUT_RDWR); close(pThis->sock); pThis->sock = -1; } if(pThis->socks != NULL) { /* if we have some sockets at this stage, we need to close them */ - for(i = 1 ; i <= pThis->socks[0] ; ++i) + for(i = 1 ; i <= pThis->socks[0] ; ++i) { + shutdown(pThis->socks[i], SHUT_RDWR); close(pThis->socks[i]); + } free(pThis->socks); } @@ -2860,6 +2863,7 @@ relpTcpSend(relpTcp_t *const pThis, relpOctet_t *pBuf, ssize_t *pLenBuf) #endif /* defined(ENABLE_TLS) | defined(ENABLE_TLS_OPENSSL */ written = send(pThis->sock, pBuf, *pLenBuf, 0); const int errno_save = errno; + pThis->pEngine->dbgprint("relpTcpSend: send data: %.*s\n", (int) *pLenBuf, pBuf); pThis->pEngine->dbgprint("relpTcpSend: sock %d, lenbuf %zd, send returned %d [errno %d]\n", (int)pThis->sock, *pLenBuf, (int) written, errno_save); if(written == -1) { diff --git a/tests/Makefile.am b/tests/Makefile.am index 5e14ac91..00c76012 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -1,21 +1,22 @@ -check_PROGRAMS=receive send +check_PROGRAMS=receive send chkseq receive_SOURCES=receive.c -#receive_LDADD=-lrelp receive_LDADD=../src/.libs/librelp.la receive_CFLAGS=$(AM_CFLAGS) -I${top_srcdir}/src $(WARN_CFLAGS) send_SOURCES=send.c -#send_LDADD=-lrelp send_LDADD=../src/.libs/librelp.la send_CFLAGS=$(AM_CFLAGS) -I${top_srcdir}/src $(WARN_CFLAGS) +chkseq_SOURCES=chkseq.c + VALGRIND_TESTS= \ tls-basic-vg.sh \ duplicate-receiver-vg.sh TESTS= basic.sh \ basic-realistic.sh \ + receiver-abort.sh \ tls-basic.sh \ tls-basic-anon.sh \ tls-basic-certvalid.sh \ @@ -46,6 +47,7 @@ endif EXTRA_DIST=$(TESTS) \ $(VALGRIND_TESTS) \ dummyclient.py \ + dummyserver.py \ test-framework.sh \ receive.c \ send.c \ diff --git a/tests/basic-realistic.sh b/tests/basic-realistic.sh index 4e2364fc..54d3be13 100755 --- a/tests/basic-realistic.sh +++ b/tests/basic-realistic.sh @@ -4,7 +4,7 @@ # of messages . ${srcdir:=$(pwd)}/test-framework.sh NUMMESSAGES=50000 -startup_receiver $OPT_VERBOSE +startup_receiver ./send -t 127.0.0.1 -p $TESTPORT -n$NUMMESSAGES $OPT_VERBOSE stop_receiver check_msg_count diff --git a/tests/chkseq.c b/tests/chkseq.c new file mode 100644 index 00000000..b5ce0bf8 --- /dev/null +++ b/tests/chkseq.c @@ -0,0 +1,249 @@ +/* Checks if a file consists of line of strictly monotonically + * increasing numbers. An expected start and end number may + * be set. + * + * Params + * -f file to process, if not given stdin is processed. + * -s -e + * default for s is 0. -e should be given (else it is also 0) + * -d may be specified, in which case duplicate messages are permitted. + * -m number of messages permitted to be missing without triggering a + * failure. This is necessary for some failover tests, where it is + * impossible to totally guard against messagt loss. By default, NO + * message is permitted to be lost. + * -T anticipate truncation (which means specified payload length may be + * more than actual payload (which may have been truncated) + * -i increment between messages (default: 1). Can be used for tests which + * intentionally leave consistent gaps in the message numbering. + * + * Part of the testbench for rsyslog. + * + * Copyright 2009-2018 Rainer Gerhards and Adiscon GmbH. + * + * This file is part of rsyslog. + * + * Rsyslog is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Rsyslog is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Rsyslog. If not, see . + * + * A copy of the GPL can be found in the file "COPYING" in this distribution. + */ +#include "config.h" +#include +#include +#include +#if defined(_AIX) + #include +#else + #include +#endif + +int main(int argc, char *argv[]) +{ + FILE *fp; + int val; + int i; + int ret = 0; + int scanfOK; + int verbose = 0; + int bHaveExtraData = 0; + int bAnticipateTruncation = 0; + int dupsPermitted = 0; + int start = 0, end = 0; + int opt; + int lostok = 0; /* how many messages are OK to be lost? */ + int nDups = 0; + int increment = 1; + int reachedEOF; + int edLen; /* length of extra data */ + static char edBuf[500*1024]; /* buffer for extra data (pretty large to be on the save side...) */ + static char ioBuf[sizeof(edBuf)+1024]; + char *file = NULL; + + while((opt = getopt(argc, argv, "i:e:f:ds:vm:ET")) != EOF) { + switch((char)opt) { + case 'f': + file = optarg; + break; + case 'd': + dupsPermitted = 1; + break; + case 'i': + increment = atoi(optarg); + break; + case 'e': + end = atoi(optarg); + break; + case 's': + start = atoi(optarg); + break; + case 'v': + ++verbose; + break; + case 'm': + lostok = atoi(optarg); + break; + case 'E': + bHaveExtraData = 1; + break; + case 'T': + bAnticipateTruncation = 1; + break; + default:printf("Invalid call of chkseq, optchar='%c'\n", opt); + printf("Usage: chkseq file -sstart -eend -d -E\n"); + exit(1); + } + } + + if(start > end) { + printf("start must be less than or equal end!\n"); + exit(1); + } + + if(verbose) { + printf("chkseq: start %d, end %d\n", start, end); + } + + /* read file */ + if(file == NULL) { + fp = stdin; + } else { + fp = fopen(file, "r"); + } + if(fp == NULL) { + printf("error opening file '%s'\n", file); + perror(file); + exit(1); + } + + for(i = start ; i < end+1 ; i += increment) { + if(bHaveExtraData) { + if(fgets(ioBuf, sizeof(ioBuf), fp) == NULL) { + scanfOK = 0; + } else { + scanfOK = sscanf(ioBuf, "%d,%d,%s\n", &val, &edLen, edBuf) == 3 ? 1 : 0; + } + if(edLen != (int) strlen(edBuf)) { + if (bAnticipateTruncation == 1) { + if (edLen < (int) strlen(edBuf)) { + printf("extra data length specified %d, but actually is %ld in" + " record %d (truncation was anticipated, but payload should" + " have been smaller than data-length, not larger)\n", + edLen, (long) strlen(edBuf), i); + exit(1); + } + } else { + printf("extra data length specified %d, but actually is %ld in record %d\n", + edLen, (long) strlen(edBuf), i); + exit(1); + } + } + } else { + if(fgets(ioBuf, sizeof(ioBuf), fp) == NULL) { + scanfOK = 0; + } else { + scanfOK = sscanf(ioBuf, "%d\n", &val) == 1 ? 1 : 0; + } + } + if(!scanfOK) { + printf("scanf error in index i=%d\n", i); + exit(1); + } + while(val > i && lostok > 0) { + --lostok; + printf("message %d missing (ok due to -m [now %d])\n", i, lostok); + ++i; + } + if(val != i) { + if(val == i - 1 && dupsPermitted) { + --i; + ++nDups; + } else { + printf("read value %d, but expected value %d\n", val, i); + exit(1); + } + } + } + + if(i - 1 != end) { + printf("only %d records in file, expected %d\n", i - 1, end); + exit(1); + } + + int c = getc(fp); + if(c == EOF) { + reachedEOF = 1; + } else { + ungetc(c, fp); + /* if duplicates are permitted, we need to do a final check if we have duplicates at the + * end of file. + */ + if(dupsPermitted) { + i = end; + while(!feof(fp)) { + if(bHaveExtraData) { + if(fgets(ioBuf, sizeof(ioBuf), fp) == NULL) { + scanfOK = 0; + } else { + scanfOK = sscanf(ioBuf, "%d,%d,%s\n", &val, + &edLen, edBuf) == 3 ? 1 : 0; + } + if(edLen != (int) strlen(edBuf)) { + if (bAnticipateTruncation == 1) { + if (edLen < (int) strlen(edBuf)) { + printf("extra data length specified %d, but " + "actually is %ld in record %d (truncation was" + " anticipated, but payload should have been " + "smaller than data-length, not larger)\n", + edLen, (long) strlen(edBuf), i); + exit(1); + } + } else { + printf("extra data length specified %d, but actually " + "is %ld in record %d\n", + edLen, (long) strlen(edBuf), i); + exit(1); + } + } + } else { + if(fgets(ioBuf, sizeof(ioBuf), fp) == NULL) { + scanfOK = 0; + } else { + scanfOK = sscanf(ioBuf, "%d\n", &val) == 1 ? 1 : 0; + } + } + + if(val != i) { + reachedEOF = 0; + goto breakIF; + } + } + reachedEOF = feof(fp) ? 1 : 0; + } else { + reachedEOF = 0; + } + } + +breakIF: + if(nDups != 0) + printf("info: had %d duplicates (this is no error)\n", nDups); + + if(!reachedEOF) { + printf("end of processing, but NOT end of file! First line of extra data is:\n"); + for(c = fgetc(fp) ; c != '\n' && c != EOF ; c = fgetc(fp)) + putchar(c); + putchar('\n'); + exit(1); + } + + exit(ret); +} diff --git a/tests/dummyserver.py b/tests/dummyserver.py new file mode 100755 index 00000000..de9866de --- /dev/null +++ b/tests/dummyserver.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# a very simple "dummy" server for testing purposes +# written 2018-11-19 by Rainer Gerhards, released under ASL 2.0 +import socket +import sys + +print "DUMMYSERVER starting" +port = int(sys.argv[1]) +sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) +sock.bind(("127.0.0.1", port)) +sock.listen(0) +print "DUMMYSERVER: listening on port", port + +conn, addr = sock.accept() +data = conn.recv(1024) +conn.shutdown(socket.SHUT_RDWR) +sock.shutdown(socket.SHUT_RDWR) +conn.close() +sock.close() + +print "DUMMYSERVER: received data:", data diff --git a/tests/receive.c b/tests/receive.c index 9da2a7fc..b3140698 100644 --- a/tests/receive.c +++ b/tests/receive.c @@ -31,10 +31,13 @@ #include #include "librelp.h" -#define TRY(f) if(f != RELP_RET_OK) { fprintf(stderr, "receive.c: FAILURE in '%s'\n", #f); ret = 1; goto done; } +#define TRY(f) if(f != RELP_RET_OK) { fprintf(stderr, "receive: FAILURE in '%s'\n", #f); ret = 1; goto done; } static FILE *errFile = NULL; static FILE *outFile = NULL; +static char *pidFileName = NULL; + +static int immediate_exit = 0; /* if set to 1, force-exit as soon as possible */ static relpEngine_t *pRelpEngine; @@ -60,6 +63,15 @@ terminate(__attribute__((unused)) const int sig) relpEngineSetStop(pRelpEngine); } +/* handler to unconditionally exit the code - required for test where + * server must "suddenly" abort. + */ +void +do_exit(__attribute__((unused)) const int sig) +{ + immediate_exit = 1; +} + static void __attribute__((format(printf, 1, 2))) dbgprintf(char *fmt, ...) @@ -70,7 +82,7 @@ dbgprintf(char *fmt, ...) va_start(ap, fmt); vsnprintf(pszWriteBuf, sizeof(pszWriteBuf), fmt, ap); va_end(ap); - fprintf(stderr, "receive.c: %s", pszWriteBuf); + fprintf(stderr, "receive: %s", pszWriteBuf); fflush(stderr); } @@ -86,9 +98,15 @@ static relpRetVal onSyslogRcv(unsigned char *pHostname __attribute__((unused)), memcpy(pMsg, msg, lenMsg); fprintf(outFile, "%s\n", pMsg); + fflush(outFile); free(pMsg); + if(immediate_exit) { + fprintf(stderr, "receive: force-exit %lld by user request\n", (long long) getpid()); + exit(1); + } + return RELP_RET_OK; } @@ -146,6 +164,10 @@ exit_hdlr(void) if(outFile != NULL) { fclose(outFile); } + if(pidFileName != NULL) { + unlink(pidFileName); + } + } int main(int argc, char *argv[]) { @@ -154,7 +176,6 @@ int main(int argc, char *argv[]) { int option_index = 0; unsigned char *port = NULL; int verbose = 0; - char *pidFileName = NULL; int protFamily = 2; /* IPv4=2, IPv6=10 */ relpSrv_t *pRelpSrv; int bEnableTLS = 0; @@ -166,6 +187,8 @@ int main(int argc, char *argv[]) { int maxDataSize = 0; int oversizeMode = 0; int ret = 0; + int append_outfile = 0; + const char* outfile_name = NULL; static struct option long_options[] = { @@ -177,19 +200,23 @@ int main(int argc, char *argv[]) { {"pidfile", required_argument, 0, 'F'}, {"errorfile", required_argument, 0, 'e'}, {"outfile", required_argument, 0, 'O'}, + {"append-outfile", no_argument, 0, 'A'}, {0, 0, 0, 0} }; - while((c = getopt_long(argc, argv, "a:e:F:m:o:O:P:p:Tvx:y:z:", long_options, &option_index)) != -1) { + while((c = getopt_long(argc, argv, "a:Ae:F:m:o:O:P:p:Tvx:y:z:", long_options, &option_index)) != -1) { switch(c) { case 'a': authMode = optarg; break; + case 'A': + append_outfile = 1; + break; case 'e': if((errFile = fopen((char*) optarg, "w")) == NULL) { perror(optarg); - fprintf(stderr, "error opening error file\n"); + fprintf(stderr, "receive: error opening error file\n"); exit(1); } break; @@ -214,11 +241,7 @@ int main(int argc, char *argv[]) { } break; case 'O': /* output file */ - if((outFile = fopen(optarg, "w")) == NULL) { - perror(optarg); - fprintf(stderr, "error opening output file\n"); - exit(1); - } + outfile_name = optarg; break; case 'o': /* oversize mode */ if(strcmp("truncate", optarg) == 0) { @@ -288,6 +311,15 @@ int main(int argc, char *argv[]) { } hdlr_enable(SIGTERM, terminate); + hdlr_enable(SIGUSR1, do_exit); + + if(outfile_name != NULL) { + if((outFile = fopen(outfile_name, append_outfile ? "a" : "w")) == NULL) { + perror(outfile_name); + fprintf(stderr, "receive: error opening output file\n"); + exit(1); + } + } TRY(relpEngineConstruct(&pRelpEngine)); TRY(relpEngineSetDbgprint(pRelpEngine, verbose ? dbgprintf : NULL)); @@ -350,14 +382,19 @@ int main(int argc, char *argv[]) { fclose(fp); } - TRY(relpEngineRun(pRelpEngine)); /* Abort via SIGHUP */ + int i = 0; + while(relpEngineRun(pRelpEngine) != RELP_RET_OK) { + fprintf(stderr, "receive: error starting relp engine, try %d\n", i); + ++i; + if(i >= 10) { + fprintf(stderr, "receive: giving up starting relp engine\n"); + break; + } + sleep(1); + } TRY(relpEngineDestruct(&pRelpEngine)); - if(pidFileName != NULL) { - unlink(pidFileName); - } - done: return ret; } diff --git a/tests/receiver-abort.sh b/tests/receiver-abort.sh new file mode 100755 index 00000000..37bf997f --- /dev/null +++ b/tests/receiver-abort.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# added 2018-11-15 by Rainer Gerhards, released under ASL 2.0 +# check that receiver abort is handled gracefully +# of messages +. ${srcdir:=$(pwd)}/test-framework.sh +NUMMESSAGES=100000 +startup_receiver +./send -t 127.0.0.1 -p $TESTPORT -n$NUMMESSAGES --kill-on-msg 20000 --kill-pid $RECEIVE_PID $OPT_VERBOSE & +SENDER_PID=$! +echo pre loop + +for i in {1..3}; do + echo in loop $i + sleep 2 + echo loop done sleep $i + set -x; set -v + ${srcdir}/dummyserver.py $TESTPORT + echo loop done exit dummyserver $i +done + +echo done loop +sleep 2 # make sure client goes into retry + +echo new receiver startop +startup_receiver --append-outfile +wait $SENDER_PID +stop_receiver + +check_msg_count +terminate diff --git a/tests/send.c b/tests/send.c index d8ccfd55..a67101b8 100644 --- a/tests/send.c +++ b/tests/send.c @@ -26,17 +26,23 @@ #include #include #include +#include +#include #include "librelp.h" -#define TRY(f) if(f != RELP_RET_OK) { fprintf(stderr, "send.c: FAILURE in '%s'\n", #f); ret = 1; goto done; } +#define TRY(f) if(f != RELP_RET_OK) { fprintf(stderr, "send: FAILURE in '%s'\n", #f); ret = 1; goto done; } static const char *under_ci = NULL; /* if non-null, we run under CI, so be even more sparse with stdout */ static FILE *errFile = NULL; +static FILE *dbgFile = NULL; static relpEngine_t *pRelpEngine; static size_t msgDataLen = 0; static int num_messages = 0; static size_t lenMsg = 0; static relpClt_t *pRelpClt = NULL; +static int kill_on_msg = 0; /* 0 - do not kill, else exact message */ +static int kill_signal = SIGUSR1; /* signal to use when we kill */ +static pid_t kill_pid = 0; #define USR_MAGIC 0x1234FFee struct usrdata { /* used for testing user pointer pass-back */ @@ -55,8 +61,7 @@ dbgprintf(char *fmt, ...) va_start(ap, fmt); vsnprintf(pszWriteBuf, sizeof(pszWriteBuf), fmt, ap); va_end(ap); - fprintf(stderr, "send.c: %s", pszWriteBuf); - fflush(stderr); + fprintf(dbgFile, "send.c: %s", pszWriteBuf); } void print_usage(void) @@ -111,27 +116,58 @@ exit_hdlr(void) if(errFile != NULL) { fclose(errFile); } + if(dbgFile != NULL) { + fclose(dbgFile); + } +} + + +static void +retry_connect(void) +{ + relpRetVal ret; + int try = 0; + while(try++ < 15) { + ret = relpCltReconnect(pRelpClt); + if(ret == RELP_RET_OK) + return; + sleep(1); + } + fprintf(stderr, "send: send giving up after max retries\n"); + exit(1); } + static int send_msgs_counter(void) { int ret; char buf[10]; - if(!under_ci) { - printf("%8.8d msgs sent", 0); - } + relpRetVal r; for(int i = 1 ; i <= num_messages ; ++i) { const ssize_t len = snprintf(buf, sizeof(buf), "%8.8d", i); if(len < 0 || len >= (ssize_t) sizeof(buf)) { - fprintf(stderr, "ERROR: snprintf failed with %lld\n", (long long) len); + fprintf(stderr, "send: ERROR: snprintf failed with %lld\n", (long long) len); exit(1); } if(!under_ci && (i % 1000 == 0)) { - printf("\r%8.8d", i); - fflush(stdout); + printf("\r%8.8d msgs sent", i); + } + r = relpCltSendSyslog(pRelpClt, (unsigned char*)buf, len); + if(r != RELP_RET_OK) { + fprintf(stderr, "send: failure %d in relpCltSendSyslog, msg %d\n", r, i); + if(dbgFile != NULL) + fprintf(dbgFile, "\n\nfailure %d in relpCltSendSyslog, msg %d\n\n\n", r, i); + retry_connect(); + --i; /* we need to-resend the failed message */ + } + if(kill_on_msg == i) { + kill_on_msg = 0; /* do this only once, even in retry mode */ + fprintf(stderr, " sending signal %d to %lld\n", kill_signal, (long long) kill_pid); + if(kill(kill_pid, kill_signal) != 0) { + perror("kill process"); + } } - TRY(relpCltSendSyslog(pRelpClt, (unsigned char*)buf, len)); } printf("\r%8.8d msgs sent\n", num_messages); done: return ret; @@ -186,7 +222,12 @@ int main(int argc, char *argv[]) { int ret = 0; under_ci = getenv("UNDER_CI"); + dbgFile = stdout; + #define KILL_ON_MSG 256 + #define KILL_SIGNAL 257 + #define KILL_PID 258 + #define DBGFILE 259 static struct option long_options[] = { {"ca", required_argument, 0, 'x'}, @@ -195,7 +236,11 @@ int main(int argc, char *argv[]) { {"peer", required_argument, 0, 'P'}, {"authmode", required_argument, 0, 'a'}, {"errorfile", required_argument, 0, 'e'}, + {"debugfile", required_argument, 0, DBGFILE}, {"num-messages", required_argument, 0, 'n'}, + {"kill-on-msg", required_argument, 0, KILL_ON_MSG}, + {"kill-signal", required_argument, 0, KILL_SIGNAL}, + {"kill-pid", required_argument, 0, KILL_PID}, {0, 0, 0, 0} }; @@ -211,6 +256,13 @@ int main(int argc, char *argv[]) { exit(1); } break; + case DBGFILE: + if((dbgFile = fopen(optarg, "w")) == NULL) { + perror(optarg); + fprintf(stderr, "error opening debug file\n"); + exit(1); + } + break; case 'd': len = atoi(optarg); if(len < 128) { @@ -252,6 +304,15 @@ int main(int argc, char *argv[]) { case 'z': myPrivKeyFile = optarg; break; + case KILL_ON_MSG: + kill_on_msg = atoi(optarg); + break; + case KILL_SIGNAL: + kill_signal = atoi(optarg); + break; + case KILL_PID: + kill_pid = atoi(optarg); + break; default: print_usage(); exit(1); @@ -261,7 +322,7 @@ int main(int argc, char *argv[]) { atexit(exit_hdlr); if(msgDataLen != 0 && msgDataLen < lenMsg) { - fprintf(stderr, "send.c: message is larger than configured message size!\n"); + fprintf(stderr, "send: message is larger than configured message size!\n"); exit(1); } @@ -330,9 +391,5 @@ int main(int argc, char *argv[]) { TRY(relpEngineDestruct(&pRelpEngine)); done: - if(errFile != NULL) { - fclose(errFile); - } - return ret; } diff --git a/tests/test-framework.sh b/tests/test-framework.sh index 7960ab85..ea14456a 100644 --- a/tests/test-framework.sh +++ b/tests/test-framework.sh @@ -49,16 +49,27 @@ startup_receiver() { printf 'Receiver running\n' } -# stop receiver stop_receiver() { - pid=$(cat receive.pid 2> /dev/null) - if [ "$pid" == "" ]; then - printf 'oops - we do not find the pid file in stop_receiver\n' + if [ "$RECEIVE_PID" == "" ]; then + printf 'oops - receiver pid not found in stop_receiver\n' return fi - kill $pid &> /dev/null - wait $pid - printf 'receiver %d stopped\n' $pid + kill $RECEIVE_PID &> /dev/null + wait $RECEIVE_PID + printf 'receiver %d stopped\n' $RECEIVE_PID +} + +abort_receiver() { + if [ "$RECEIVE_PID" == "" ]; then + printf 'oops - receiver pid not found in abort_receiver\n' + return + fi +set -x; set +v + printf 'stopping receiver %d via abort method\n' $RECEIVE_PID + kill -USR1 $RECEIVE_PID # &> /dev/null + wait $RECEIVE_PID + printf 'receiver %d stopped via abort method\n' $RECEIVE_PID +set +x; set +v } # $1 is the value to check for @@ -80,6 +91,21 @@ check_output() { fi } + +# wait until $TESTPORT is no longer bound, e.g. for session closure +# TODO: evaluate if this function is really used and, if not, +# TODO: remove again - 2018-11-19 rgerhards +wait_testport_available() { + while true; do + printf 'checking NETSTAT\n' + if ! netstat -tp | grep -q $TESTPORT; then + break + fi + sleep 1 + done +} + + # $1 is the value to check for # $2 (optinal) is the file to check check_output_only() { @@ -131,6 +157,11 @@ terminate() { # Works on $OUTFILE # TODO: check sequence, so that we do not have duplicates... check_msg_count() { + printf 'We have %s lines in %s\n' $(wc -l < $OUTFILE) $OUTFILE + if ! ./chkseq -s1 -e$NUMMESSAGES -f$OUTFILE -d ; then + exit 1 + fi + return lines=$(wc -l < $OUTFILE) if [ "$lines" -ne $NUMMESSAGES ]; then printf 'FAIL: message count not correct for %s\n' $OUTFILE @@ -149,7 +180,8 @@ printf "============================================================\n" # on Solaris we still have some issues sometimes. Please keep this # informational info inside the framework until this can be totally # considered revolved - rgerhards, 2018-04-17 -if [ "$(uname)" == "SunOS" ] ; then +ps -ef|grep receive +if [ "$(uname)" == "XXSunOS" ] ; then /usr/ucb/ps awwx echo pgrep pgrep receive