Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

crash (unknown cause) with test mgmt-handshake-and-volume-sync-post-glusterd-restart.t #4369

Open
mykaul opened this issue May 30, 2024 · 1 comment

Comments

@mykaul
Copy link
Contributor

mykaul commented May 30, 2024

Seen when running regression tests (https://build.gluster.org/job/gh_centos7-regression/3404/console , part of #4363 ).
I don't think the crash has anything to do with the change, the test fails (without crashing) consistently locally as well.

Backtrace:

Core was generated by `/build/install/sbin/glusterfs -s 127.1.1.3 --volfile-id shd/patchy -p /d/backen'.
Program terminated with signal 11, Segmentation fault.
#0  _gf_log (domain=0x7fc96a1bb236 "logging-infra", file=0x7fc96a1babf0 "/home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c", function=0x7fc96a1bb5d0 <__FUNCTION__.12655> "gf_log_flush_extra_msgs", line=1656, level=GF_LOG_DEBUG, fmt=0x7fc96a1bb1f8 "Log buffer size reduced. About to flush %d extra log messages") at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c:2070
2070	    ret = gf_asprintf(&logline, "[%s] %c [%s:%d:%s] %d-%s: %s\n", timestr,

Thread 9 (Thread 0x7fc95aaed700 (LWP 21173)):
#0  0x00007fc967de6ad1 in clone () from /lib64/libc.so.6
No symbol table info available.
#1  0x00007fc9688eede0 in ?? () from /lib64/libpthread.so.0
No symbol table info available.
#2  0x00007fc95aaed700 in ?? ()
No symbol table info available.
#3  0x0000000000000000 in ?? ()
No symbol table info available.

Thread 8 (Thread 0x7fc95e300700 (LWP 21128)):
#0  0x00007fc967dddb43 in select () from /lib64/libc.so.6
No symbol table info available.
#1  0x00007fc96a17354c in runner (arg=0x2822000) at /home/jenkins/root/workspace/gh_centos7-regression/contrib/timer-wheel/timer-wheel.c:187
        tv = {tv_sec = 0, tv_usec = 921681}
        base = 0x2822000
#2  0x00007fc9688eeea5 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#3  0x00007fc967de6b0d in clone () from /lib64/libc.so.6
No symbol table info available.

Thread 7 (Thread 0x7fc960584700 (LWP 21124)):
#0  0x00007fc9688f2de2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
No symbol table info available.
#1  0x00007fc96a0f4cef in gf_timer_proc (data=0x280e0d8) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/timer.c:140
        now = {tv_sec = 4856893, tv_nsec = 792708918}
        reg = 0x280e0d8
        event = 0x27f61e8
        tmp = 0x0
        old_THIS = 0x0
#2  0x00007fc9688eeea5 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#3  0x00007fc967de6b0d in clone () from /lib64/libc.so.6
No symbol table info available.

Thread 6 (Thread 0x7fc95ba43700 (LWP 21130)):
#0  0x00007fc967de70e3 in epoll_wait () from /lib64/libc.so.6
No symbol table info available.
#1  0x00007fc96a157275 in event_dispatch_epoll_worker (data=0x2802e28) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/event-epoll.c:715
        event = {events = 4, data = {ptr = 0x400000004, fd = 4, u32 = 4, u64 = 17179869188}}
        ret = 0
        ev_data = 0x2802e28
        event_pool = 0x27d2028
        myindex = 2
        timetodie = 0
        gen = 0
        poller_death_notify = {next = 0x0, prev = 0x0}
        slot = 0x0
        tmp = 0x0
        __FUNCTION__ = "event_dispatch_epoll_worker"
#2  0x00007fc9688eeea5 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#3  0x00007fc967de6b0d in clone () from /lib64/libc.so.6
No symbol table info available.

Thread 5 (Thread 0x7fc95eb01700 (LWP 21127)):
#0  0x00007fc9688f2de2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
No symbol table info available.
#1  0x00007fc96a12ffed in syncenv_task (proc=0x281e3e8) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/syncop.c:650
        env = 0x281e028
        task = 0x0
        sleep_till = {tv_sec = 1716977336, tv_nsec = 0}
        ret = 0
#2  0x00007fc96a13031b in syncenv_processor (thdata=0x281e3e8) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/syncop.c:797
        proc = 0x281e3e8
        task = 0x0
#3  0x00007fc9688eeea5 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#4  0x00007fc967de6b0d in clone () from /lib64/libc.so.6
No symbol table info available.

Thread 4 (Thread 0x7fc95f302700 (LWP 21126)):
#0  0x00007fc9688f2de2 in pthread_cond_timedwait@@GLIBC_2.3.2 () from /lib64/libpthread.so.0
No symbol table info available.
#1  0x00007fc96a12ffed in syncenv_task (proc=0x281e028) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/syncop.c:650
        env = 0x281e028
        task = 0x0
        sleep_till = {tv_sec = 1716977336, tv_nsec = 0}
        ret = 0
#2  0x00007fc96a13031b in syncenv_processor (thdata=0x281e028) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/syncop.c:797
        proc = 0x281e028
        task = 0x0
#3  0x00007fc9688eeea5 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#4  0x00007fc967de6b0d in clone () from /lib64/libc.so.6
No symbol table info available.

Thread 3 (Thread 0x7fc95fd83700 (LWP 21125)):
#0  0x00007fc9688f63c1 in sigwait () from /lib64/libpthread.so.0
No symbol table info available.
#1  0x000000000040b4f5 in ?? ()
No symbol table info available.
#2  0x0000000000000000 in ?? ()
No symbol table info available.

Thread 2 (Thread 0x7fc96a6005c0 (LWP 21123)):
#0  0x00007fc9688f0017 in pthread_join () from /lib64/libpthread.so.0
No symbol table info available.
#1  0x00007fc96a157551 in event_dispatch_epoll (event_pool=0x27d2028) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/event-epoll.c:809
        i = 2
        t_id = 140502802642688
        pollercount = 2
        ret = 0
        ev_data = 0x2802e28
        __FUNCTION__ = "event_dispatch_epoll"
#2  0x00007fc96a117705 in gf_event_dispatch (event_pool=0x27d2028) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/event.c:115
        ret = -1
        __FUNCTION__ = "gf_event_dispatch"
#3  0x00007fc96a18242c in gf_io_legacy_wait () at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/gf-io-legacy.c:35
No locals.
#4  0x00007fc96a17c970 in gf_io_main (workers=0, handlers=0x7ffd0d0ad4c0, data=0x0) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/gf-io.c:431
        signals = {11, 7, 4, 31, 8, 6, 18, 0}
        pool = {mutex = {__data = {__lock = 4213136, __count = 0, __owner = 218813904, __nusers = 32765, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0, __next = 0xd0ac3f0}}, __size = "\220I@\000\000\000\000\000\320\325\n\r\375\177", '\000' <repeats 18 times>, "\360\303\n\r\000\000\000", __align = 4213136}, threads = {next = 0x7fc96a3e87e0 <gf_io_engine_legacy>, prev = 0x0}}
        cfg = {name = 0x0, pool = 0x0, cpus = 0x0, setup = 0x0, main = 0x0, signals = 0x7fc95f303000, num_threads = 0, stack_size = 0, priority = 0, first_id = 0, index = 0, timeout = 0, retries = 0}
        res = 0
#5  0x00007fc96a17ccc2 in gf_io_run (name=0x0, handlers=0x7ffd0d0ad4c0, data=0x0) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/gf-io.c:516
        engine = 0x7fc96a3e87e0 <gf_io_engine_legacy>
        i = 0
        res = 0
        __FUNCTION__ = "gf_io_run"
#6  0x000000000040c50f in ?? ()
No symbol table info available.
#7  0x0000000000000000 in ?? ()
No symbol table info available.

Thread 1 (Thread 0x7fc95c244700 (LWP 21129)):
#0  _gf_log (domain=0x7fc96a1bb236 "logging-infra", file=0x7fc96a1babf0 "/home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c", function=0x7fc96a1bb5d0 <__FUNCTION__.12655> "gf_log_flush_extra_msgs", line=1656, level=GF_LOG_DEBUG, fmt=0x7fc96a1bb1f8 "Log buffer size reduced. About to flush %d extra log messages") at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c:2070
        basename = 0x7fc96a1bac34 "logging.c"
        new_logfile = 0x0
        ap = {{gp_offset = 48, fp_offset = 48, overflow_arg_area = 0x7fc95c2413b8, reg_save_area = 0x7fc95c2412e0}}
        timestr = "2024-05-29 09:58:57.503978 +0000", '\000' <repeats 223 times>
        tv = {tv_sec = 1716976737, tv_usec = 503978}
        logline = 0x0
        msg = 0x27f6d20 "Log buffer size reduced. About to flush 5 extra log messages"
        ret = 0
        fd = -1
        this = 0x28c0428
        ctx = 0x28c0428
        __PRETTY_FUNCTION__ = "_gf_log"
        __FUNCTION__ = "_gf_log"
#1  0x00007fc96a0e58b5 in gf_log_flush_extra_msgs (log=0x27842e0, new=0) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c:1653
        count = 5
        i = 5
        iter = 0x27842f0
        tmp = 0x27842f0
        copy = {next = 0x27d0128, prev = 0x27d0628}
        __FUNCTION__ = "gf_log_flush_extra_msgs"
#2  0x00007fc96a0e244c in gf_log_set_log_buf_size (ctx=0x2784000, buf_size=0) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c:258
        old = 5
#3  0x00007fc96a0e2896 in gf_log_disable_suppression_before_exit (ctx=0x2784000) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/logging.c:402
No locals.
#4  0x00007fc96a0eca33 in gf_print_trace (signum=11, ctx=0x2784000) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/common-utils.c:650
        msg = '\000' <repeats 1023 times>
        timestr = '\000' <repeats 255 times>
        stack = 0x0
#5  0x000000000040b594 in ?? ()
No symbol table info available.
#6  0x0000000002782000 in ?? ()
No symbol table info available.
#7  0x0000000b67d30881 in ?? ()
No symbol table info available.
#8  0x00007fc95c242060 in ?? ()
No symbol table info available.
#9  <signal handler called>
No symbol table info available.
#10 0x0000000cdeadc0de in ?? ()
No symbol table info available.
#11 0x00007fc96a0dfbe1 in xlator_notify (xl=0x28c0428, event=6, data=0x28bf228) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/xlator.c:714
        old_THIS = 0x28bf228
        ret = 0
#12 0x00007fc96a1b8da0 in default_notify (this=0x28bf228, event=6, data=0x28b6828) at defaults.c:3387
        parent = 0x2b02528
        ret = 0
        victim = 0x28b6828
        graph = 0x28144a8
        __FUNCTION__ = "default_notify"
#13 0x00007fc95ab15d03 in notify (this=0x28bf228, event=6, data=0x28b6828) at /home/jenkins/root/workspace/gh_centos7-regression/xlators/debug/io-stats/src/io-stats.c:4332
        ret = 0
        args = {type = IOS_DUMP_TYPE_NONE, u = {logfp = 0x0, dict = 0x0}}
        output = 0x7fc95ab154de <notify>
        dict = 0x28b6828
        op = 0
        list_cnt = 0
        throughput = 0
        time = 0
        is_peek = false
        ap = {{gp_offset = 32, fp_offset = 48, overflow_arg_area = 0x7fc95c242330, reg_save_area = 0x7fc95c242270}}
        up_data = 0x0
        up_ci = 0x0
        __FUNCTION__ = "notify"
#14 0x00007fc96a0dfbe1 in xlator_notify (xl=0x28bf228, event=6, data=0x28b6828) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/xlator.c:714
        old_THIS = 0x28b6828
        ret = 0
#15 0x00007fc96a1b8da0 in default_notify (this=0x28b6828, event=6, data=0x28b3228) at defaults.c:3387
        parent = 0x2803928
        ret = 0
        victim = 0x28b3228
        graph = 0x28144a8
        __FUNCTION__ = "default_notify"
#16 0x00007fc95adb988a in afr_notify (this=0x28b6828, event=6, data=0x28b3228, data2=0x7fc95adbea0e <notify>) at /home/jenkins/root/workspace/gh_centos7-regression/xlators/cluster/afr/src/afr-common.c:6424
        priv = 0x27cce28
        child_xlator = 0x28b3228
        i = 2
        propagate = 1
        had_heard_from_all = 0
        have_heard_from_all = 1
        idx = 1
        ret = 0
        call_psh = 0
        up_child = -1
        input = 0x0
        output = 0x0
        had_quorum = false
        has_quorum = false
        halo_max_latency_msec = 0
        child_latency_msec = -1
        __FUNCTION__ = "afr_notify"
#17 0x00007fc95adbeb18 in notify (this=0x28b6828, event=6, data=0x28b3228) at /home/jenkins/root/workspace/gh_centos7-regression/xlators/cluster/afr/src/afr.c:42
        ret = -1
        ap = {{gp_offset = 32, fp_offset = 48, overflow_arg_area = 0x7fc95c242730, reg_save_area = 0x7fc95c242670}}
        data2 = 0x7fc95adbea0e <notify>
#18 0x00007fc96a0dfbe1 in xlator_notify (xl=0x28b6828, event=6, data=0x28b3228) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/xlator.c:714
        old_THIS = 0x28b3228
        ret = 0
#19 0x00007fc96a1b8da0 in default_notify (this=0x28b3228, event=6, data=0x2858848) at defaults.c:3387
        parent = 0x2803828
        ret = 0
        victim = 0x2858848
        graph = 0x28144a8
        __FUNCTION__ = "default_notify"
#20 0x00007fc95afee5d6 in client_notify_dispatch (this=0x28b3228, event=6, data=0x2858848) at /home/jenkins/root/workspace/gh_centos7-regression/xlators/protocol/client/src/client.c:146
        ret = -1
        ctx = 0x2784000
        conf = 0x27ce9c8
#21 0x00007fc95afee4a9 in client_notify_dispatch_uniq (this=0x28b3228, event=6, data=0x2858848) at /home/jenkins/root/workspace/gh_centos7-regression/xlators/protocol/client/src/client.c:118
        conf = 0x27ce9c8
        ctx = 0x2784000
        graph = 0x28144a8
#22 0x00007fc95affd048 in client_rpc_notify (rpc=0x2858848, mydata=0x28b3228, event=RPC_CLNT_DISCONNECT, data=0x0) at /home/jenkins/root/workspace/gh_centos7-regression/xlators/protocol/client/src/client.c:2275
        this = 0x28b3228
        conf = 0x27ce9c8
        is_parent_down = false
        ret = 0
        __FUNCTION__ = "client_rpc_notify"
#23 0x00007fc969e8596c in rpc_clnt_handle_disconnect (clnt=0x2858848, conn=0x2858878) at /home/jenkins/root/workspace/gh_centos7-regression/rpc/rpc-lib/src/rpc-clnt.c:785
        ts = {tv_sec = 0, tv_nsec = 0}
        unref_clnt = false
        pre_notify_gen = 0
        post_notify_gen = 0
        __FUNCTION__ = "rpc_clnt_handle_disconnect"
#24 0x00007fc969e85c2b in rpc_clnt_notify (trans=0x2851ba8, mydata=0x2858878, event=RPC_TRANSPORT_DISCONNECT, data=0x2851ba8) at /home/jenkins/root/workspace/gh_centos7-regression/rpc/rpc-lib/src/rpc-clnt.c:846
        conn = 0x2858878
        clnt = 0x2858848
        ret = -1
        req_info = 0x0
        pollin = 0x0
        clnt_mydata = 0x0
        old_THIS = 0x28b3228
        __FUNCTION__ = "rpc_clnt_notify"
#25 0x00007fc969e822ba in rpc_transport_notify (this=0x2851ba8, event=RPC_TRANSPORT_DISCONNECT, data=0x2851ba8) at /home/jenkins/root/workspace/gh_centos7-regression/rpc/rpc-lib/src/rpc-transport.c:504
        ret = -1
        __FUNCTION__ = "rpc_transport_notify"
#26 0x00007fc95d8e422c in socket_event_poll_err (this=0x2851ba8, gen=1, idx=3) at /home/jenkins/root/workspace/gh_centos7-regression/rpc/rpc-transport/socket/src/socket.c:1366
        priv = 0x27836a8
        socket_closed = true
#27 0x00007fc95d8e9124 in socket_event_handler (fd=12, idx=3, gen=1, data=0x2851ba8, poll_in=1, poll_out=0, poll_err=16, event_thread_died=0) at /home/jenkins/root/workspace/gh_centos7-regression/rpc/rpc-transport/socket/src/socket.c:2821
        sa = 0x2851c68
        this = 0x2851ba8
        priv = 0x27836a8
        ret = -1
        ctx = 0x2784000
        socket_closed = false
        notify_handled = false
        __FUNCTION__ = "socket_event_handler"
#28 0x00007fc96a156d94 in event_dispatch_epoll_handler (event_pool=0x27d2028, event=0x7fc95c243050) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/event-epoll.c:614
        ev_data = 0x7fc95c243054
        slot = 0x27d6238
        handler = 0x7fc95d8e8ac8 <socket_event_handler>
        data = 0x2851ba8
        idx = 3
        gen = 1
        ret = 0
        fd = 12
        handled_error_previously = false
        __FUNCTION__ = "event_dispatch_epoll_handler"
#29 0x00007fc96a1572aa in event_dispatch_epoll_worker (data=0x2802de8) at /home/jenkins/root/workspace/gh_centos7-regression/libglusterfs/src/event-epoll.c:725
        event = {events = 17, data = {ptr = 0x100000003, fd = 3, u32 = 3, u64 = 4294967299}}
        ret = 1
        ev_data = 0x2802de8
        event_pool = 0x27d2028
        myindex = 1
        timetodie = 0
        gen = 0
        poller_death_notify = {next = 0x0, prev = 0x0}
        slot = 0x0
        tmp = 0x0
        __FUNCTION__ = "event_dispatch_epoll_worker"
#30 0x00007fc9688eeea5 in start_thread () from /lib64/libpthread.so.0
No symbol table info available.
#31 0x00007fc967de6b0d in clone () from /lib64/libc.so.6
No symbol table info available.

mgmt-handshake-and-volume-sync-post-glusterd-restart-iteration-1.tar.gz

@mykaul
Copy link
Contributor Author

mykaul commented May 30, 2024

Perhaps #4123 ?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant