Netxms server segfaulted during configuration poll

Started by cyril, September 18, 2017, 02:26:02 PM

Previous topic - Next topic

cyril

Hi. Our netxms sever crushed because of segmentation fault. Here is back trace:

Core was generated by `/usr/bin/netxmsd -d'.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  wcscmp () at ../sysdeps/x86_64/wcscmp.S:424
424     ../sysdeps/x86_64/wcscmp.S: No such file or directory.
(gdb) set print pretty on
(gdb) set height 0
(gdb) bt full
#0  wcscmp () at ../sysdeps/x86_64/wcscmp.S:424
No locals.
#1  0x00007fca7b446db1 in msort_with_tmp (p=0x7fc9c31b3d20, b=0x11788288, n=2) at msort.c:83
        b1 = 0x11788288 "\200B\271\022"
        b2 = 0x11788290 "\300Ag\017"
        n1 = 1
        n2 = 1
        tmp = 0xfa7c210 "\240;\254\016"
        s = 8
        cmp = 0x7fca7e9c2e20 <PackageNameComparator(SoftwarePackage const**, SoftwarePackage const**)>
        arg = 0x0
#2  0x00007fca7b446b18 in msort_with_tmp (n=2, b=0x11788288, p=0x7fc9c31b3d20) at msort.c:45
No locals.
#3  msort_with_tmp (p=0x7fc9c31b3d20, b=0x11788280, n=3) at msort.c:54
        b1 = 0x11788280 "\220\242\276\021"
        b2 = 0x11788288 "\200B\271\022"
        n1 = 1
        n2 = 2
        tmp = <optimized out>
        s = <optimized out>
        cmp = <optimized out>
        arg = <optimized out>
#4  0x00007fca7b446b02 in msort_with_tmp (n=3, b=0x11788280, p=0x7fc9c31b3d20) at msort.c:45
No locals.
#5  msort_with_tmp (p=0x7fc9c31b3d20, b=0x11788280, n=6) at msort.c:53
        b1 = 0x11788280 "\220\242\276\021"
        b2 = 0x11788298 "\200d\376\017"
        n1 = 3
        n2 = 3
        tmp = <optimized out>
        s = <optimized out>
        cmp = <optimized out>
        arg = <optimized out>
#6  0x00007fca7b446b02 in msort_with_tmp (n=6, b=0x11788280, p=0x7fc9c31b3d20) at msort.c:45
No locals.
#7  msort_with_tmp (p=0x7fc9c31b3d20, b=0x11788280, n=13) at msort.c:53
        b1 = 0x11788280 "\220\242\276\021"
        b2 = 0x117882b0 "P\321\r\023"
        n1 = 6
        n2 = 7
        tmp = <optimized out>
        s = <optimized out>
        cmp = <optimized out>
        arg = <optimized out>
#8  0x00007fca7b446b02 in msort_with_tmp (n=13, b=0x11788280, p=0x7fc9c31b3d20) at msort.c:45
No locals.
#9  msort_with_tmp (p=0x7fc9c31b3d20, b=0x11788280, n=26) at msort.c:53
        b1 = 0x11788280 "\220\242\276\021"
        b2 = 0x117882e8 "@\353t\020"
        n1 = 13
        n2 = 13
        tmp = <optimized out>
        s = <optimized out>
        cmp = <optimized out>
        arg = <optimized out>
#10 0x00007fca7b446b02 in msort_with_tmp (n=26, b=0x11788280, p=0x7fc9c31b3d20) at msort.c:45
No locals.
#11 msort_with_tmp (p=0x7fc9c31b3d20, b=0x11788280, n=53) at msort.c:53
        b1 = 0x11788280 "\220\242\276\021"
        b2 = 0x11788350 "\300\002J\024"
        n1 = 26
        n2 = 27
        tmp = <optimized out>
        s = <optimized out>
        cmp = <optimized out>
        arg = <optimized out>
#12 0x00007fca7b446b02 in msort_with_tmp (n=53, b=0x11788280, p=0x7fc9c31b3d20) at msort.c:45
No locals.
#13 msort_with_tmp (p=0x7fc9c31b3d20, b=0x11788280, n=107) at msort.c:53
        b1 = 0x11788280 "\220\242\276\021"
        b2 = 0x11788428 "\340\201?\025"
        n1 = 53
        n2 = 54
        tmp = <optimized out>
        s = <optimized out>
        cmp = <optimized out>
        arg = <optimized out>
#14 0x00007fca7b446b02 in msort_with_tmp (n=107, b=0x11788280, p=0x7fc9c31b3d20) at msort.c:45
No locals.
#15 msort_with_tmp (p=0x7fc9c31b3d20, b=0x11788280, n=215) at msort.c:53
        b1 = 0x11788280 "\220\242\276\021"
        b2 = 0x117885d8 "\260\277\300\020"
        n1 = 107
        n2 = 108
        tmp = <optimized out>
        s = <optimized out>
        cmp = <optimized out>
        arg = <optimized out>
#16 0x00007fca7b446b02 in msort_with_tmp (n=215, b=0x11788280, p=0x7fc9c31b3d20) at msort.c:45
No locals.
#17 msort_with_tmp (p=0x7fc9c31b3d20, b=0x11788280, n=430) at msort.c:53
        b1 = 0x11788280 "\220\242\276\021"
        b2 = 0x11788938 "\200\227\337\024"
        n1 = 215
        n2 = 215
        tmp = <optimized out>
        s = <optimized out>
        cmp = <optimized out>
        arg = <optimized out>
#18 0x00007fca7b44705c in msort_with_tmp (n=430, b=0x11788280, p=0x7fc9c31b3d20) at msort.c:45
No locals.
#19 __GI_qsort_r (b=0x11788280, n=430, s=8, cmp=0x7fca7e9c2e20 <PackageNameComparator(SoftwarePackage const**, SoftwarePackage const**)>, arg=<optimized out>)
    at msort.c:297
        size = <optimized out>
        tmp = 0xfa7c210 "\240;\254\016"
        p = {
          s = 8,
          var = 1,
          cmp = 0x7fca7e9c2e20 <PackageNameComparator(SoftwarePackage const**, SoftwarePackage const**)>,
          arg = 0x0,
          t = 0xfa7c210 "\240;\254\016"
        }
#20 0x00007fca7e9d44fa in sort (cb=<optimized out>, this=<optimized out>) at ../../../include/nms_util.h:726
No locals.
#21 Node::updateSoftwarePackages (this=0x7443860, poller=0x1ae, requestId=0) at node.cpp:2214
        table = 0x10830150
        packages = 0x11bea250
#22 0x00007fca7e9d87d5 in Node::configurationPoll (this=0x7443860, pSession=0x0, dwRqId=0, poller=0x7fca4d0b4bd0, maskBits=0) at node.cpp:2428
        type = NODE_TYPE_UNKNOWN
        szBuffer = L'\000' <repeats 3678 times>...
        hasChanges = true
#23 0x00007fca7e9d8ec3 in Node::configurationPoll (this=0x7443860, poller=0x7fca4d0b4bd0) at node.cpp:2275
No locals.
#24 0x00007fca7e970781 in __ThreadPoolExecute_Wrapper<Node, PollerInfo*> (arg=0x7fca4c4423b0) at ../../../include/nms_threads.h:970
        wd = 0x7fca4c4423b0
#25 0x00007fca7d9ed3b6 in WorkerThread (arg=0x7fca4c2cd240) at tp.cpp:127
        rq = 0x7fca4c2fbf90
        p = 0x7fca4c0fd2b0
        q = 0x7fca4c0fd380
#26 0x00007fca7c642064 in start_thread (arg=0x7fc9c31b8700) at pthread_create.c:309
        __res = <optimized out>
        pd = 0x7fc9c31b8700
        now = <optimized out>
        unwind_buf = {
          cancel_jmp_buf = {{
              jmp_buf = {140504538515200, 8965125287727607267, 0, 263873856, 17, 140504538515200, -8937107837346107933, -8935566916459024925},
              mask_was_saved = 0
            }},
          priv = {
            pad = {0x0, 0x0, 0x0, 0x0},
            data = {
              prev = 0x0,
              cleanup = 0x0,
              canceltype = 0
            }
          }
        }
        not_first_call = <optimized out>
        pagesize_m1 = <optimized out>
        sp = <optimized out>
        freesize = <optimized out>
        __PRETTY_FUNCTION__ = "start_thread"
#27 0x00007fca7b4f862d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111
No locals.
(gdb)

It seems that the problem is in "software inventory" feature. Error occurred during poll from agent of version 2.0.7. Is it possible to temporary disable this feature?

Victor Kirhenshtein

Hi,

you can only disable configuration polls for that node completely. Could you please provide output of command

nxget -T node_ip_here System.InstalledProducts

?

Best regards,
Victor

cyril

Quote from: Victor Kirhenshtein on September 19, 2017, 10:10:39 PM
Could you please provide output of command
nxget -T node_ip_here System.InstalledProducts
?

It prints installed packages like this (output exceeds max message lenght)

| NAME                             | VERSION                              | VENDOR | DATE | URL                                                                       | DESCRIPTION                                                              |
| acpi                             | 1.6-1                                |        |      | http://sourceforge.net/projects/acpiclient                                | displays information on ACPI devices                                     |
| acpi-support-base                | 0.140-5+deb7u3                       |        |      |                                                                           | scripts for handling base ACPI events such as the power button           |
| acpid                            | 1:2.0.16-1+deb7u1                    |        |      | http://www.tedfelix.com/linux/acpid-netlink.html                          | Advanced Configuration and Power Interface event daemon                  |
| adduser                          | 3.113+nmu3                           |        |      | http://alioth.debian.org/projects/adduser/                                | add and remove users and groups                                          |
| apache2-mpm-prefork              | 2.2.22-13+deb7u6                     |        |      | http://httpd.apache.org/                                                  | Apache HTTP Server - traditional non-threaded model                      |
| apache2-utils                    | 2.2.22-13+deb7u6                     |        |      | http://httpd.apache.org/                                                  | utility programs for webservers                                          |
| apache2.2-bin                    | 2.2.22-13+deb7u6                     |        |      | http://httpd.apache.org/                                                  | Apache HTTP Server common binary files                                   |
| apache2.2-common                 | 2.2.22-13+deb7u6                     |        |      | http://httpd.apache.org/                                                  | Apache HTTP Server common files                                          |
| apt                              | 0.9.7.9+deb7u7                       |        |      |                                                                           | commandline package manager                                              |
| apt-utils                        | 0.9.7.9+deb7u7                       |        |      |                                                                           | package managment related utility programs                               |
| aptitude                         | 0.6.8.2-1                            |        |      | http://aptitude.alioth.debian.org/                                        | terminal-based package manager                                           |
| aptitude-common                  | 0.6.8.2-1                            |        |      | http://aptitude.alioth.debian.org/                                        | architecture indepedent files for the aptitude package manager           |
| asterisk                         | 1:11.13.1~dfsg-2~bpo70+1             |        |      | http://www.asterisk.org/                                                  | Open Source Private Branch Exchange (PBX)                                |
| asterisk-config                  | 1:11.13.1~dfsg-2~bpo70+1             |        |      | http://www.asterisk.org/                                                  | Configuration files for Asterisk                                         |
| asterisk-core-sounds-en          | 1.4.22-1                             |        |      | http://www.asterisk.org/                                                  | asterisk PBX sound files - US English                                    |
| asterisk-core-sounds-en-gsm      | 1.4.22-1                             |        |      | http://www.asterisk.org/                                                  | asterisk PBX sound files - en-us/gsm                                     |
| asterisk-modules                 | 1:11.13.1~dfsg-2~bpo70+1             |        |      | http://www.asterisk.org/                                                  | loadable modules for the Asterisk PBX                                    |
| asterisk-moh-opsound-gsm         | 2.03-1                               |        |      | http://www.asterisk.org/                                                  | asterisk extra sound files - English/gsm                                 |
| asterisk-mp3                     | 1:11.13.1~dfsg-2~bpo70+1             |        |      | http://www.asterisk.org/                                                  | MP3 playback support for the Asterisk PBX                                |
| asterisk-mysql                   | 1:11.13.1~dfsg-2~bpo70+1             |        |      | http://www.asterisk.org/                                                  | MySQL database protocol support for the Asterisk PBX                     |
| asterisk-voicemail               | 1:11.13.1~dfsg-2~bpo70+1             |        |      | http://www.asterisk.org/                                                  | simple voicemail support for the Asterisk PBX                            |
| autopoint                        | 0.18.1.1-9                           |        |      | http://www.gnu.org/software/gettext/                                      | The autopoint program from GNU gettext                                   |
| bareos-common                    | 15.2.0.git.1438969517.7e23e6f-1142.1 |        |      | http://www.bareos.org/                                                    | Backup Archiving Recovery Open Sourced - common files                    |
| bareos-filedaemon                | 15.2.0.git.1438969517.7e23e6f-1142.1 |        |      | http://www.bareos.org/                                                    | Backup Archiving Recovery Open Sourced - file daemon                     |
| base-files                       | 7.1wheezy9                           |        |      |                                                                           | Debian base system miscellaneous files                                   |
| base-passwd                      | 3.5.26                               |        |      |                                                                           | Debian base system master password and group files                       |
| bash                             | 4.2+dfsg-0.1+deb7u3                  |        |      | http://tiswww.case.edu/php/chet/bash/bashtop.html                         | GNU Bourne Again SHell                                                   |
| binutils                         | 2.22-8+deb7u2                        |        |      |                                                                           | GNU assembler, linker and binary utilities                               |
| bsdmainutils                     | 9.0.3                                |        |      |                                                                           | collection of more utilities from FreeBSD                                |
| bsdutils                         | 1:2.20.1-5.3                         |        |      | http://userweb.kernel.org/~kzak/util-linux/                               | Basic utilities from 4.4BSD-Lite                                         |
| build-essential                  | 11.5                                 |        |      |                                                                           | Informational list of build-essential packages                           |
| busybox                          | 1:1.20.0-7                           |        |      | http://www.busybox.net                                                    | Tiny utilities for small and embedded systems                            |
| bzip2                            | 1.0.6-4                              |        |      | http://www.bzip.org/                                                      | high-quality block-sorting file compressor - utilities                   |
| ca-certificates                  | 20130119+deb7u1                      |        |      |                                                                           | Common CA certificates                                                   |
| console-setup                    | 1.88                                 |        |      |                                                                           | console font and keymap setup program                                    |
| console-setup-linux              | 1.88                                 |        |      |                                                                           | Linux specific part of console-setup                                     |
| coreutils                        | 8.13-3.5                             |        |      | http://gnu.org/software/coreutils                                         | GNU core utilities                                                       |


We have had another segfault during a poll from another node with agent version 2.0.7. Nxget worked correctly with it too. Here is backtrace:

#0  wcscmp () at ../sysdeps/x86_64/wcscmp.S:424
#1  0x00007f12bbd3edb1 in msort_with_tmp (p=0x7f1220e09d20, b=0x7f11f63d24e0, n=2) at msort.c:83
#2  0x00007f12bbd3eb02 in msort_with_tmp (n=2, b=0x7f11f63d24e0, p=0x7f1220e09d20) at msort.c:45
#3  msort_with_tmp (p=0x7f1220e09d20, b=0x7f11f63d24e0, n=4) at msort.c:53
#4  0x00007f12bbd3eb02 in msort_with_tmp (n=4, b=0x7f11f63d24e0, p=0x7f1220e09d20) at msort.c:45
#5  msort_with_tmp (p=0x7f1220e09d20, b=0x7f11f63d24e0, n=8) at msort.c:53
#6  0x00007f12bbd3eb02 in msort_with_tmp (n=8, b=0x7f11f63d24e0, p=0x7f1220e09d20) at msort.c:45
#7  msort_with_tmp (p=0x7f1220e09d20, b=0x7f11f63d24e0, n=17) at msort.c:53
#8  0x00007f12bbd3eb02 in msort_with_tmp (n=17, b=0x7f11f63d24e0, p=0x7f1220e09d20) at msort.c:45
#9  msort_with_tmp (p=0x7f1220e09d20, b=0x7f11f63d24e0, n=35) at msort.c:53
#10 0x00007f12bbd3eb02 in msort_with_tmp (n=35, b=0x7f11f63d24e0, p=0x7f1220e09d20) at msort.c:45
#11 msort_with_tmp (p=0x7f1220e09d20, b=0x7f11f63d24e0, n=70) at msort.c:53
#12 0x00007f12bbd3eb02 in msort_with_tmp (n=70, b=0x7f11f63d24e0, p=0x7f1220e09d20) at msort.c:45
#13 msort_with_tmp (p=0x7f1220e09d20, b=0x7f11f63d24e0, n=141) at msort.c:53
#14 0x00007f12bbd3eb02 in msort_with_tmp (n=141, b=0x7f11f63d24e0, p=0x7f1220e09d20) at msort.c:45
#15 msort_with_tmp (p=0x7f1220e09d20, b=0x7f11f63d24e0, n=282) at msort.c:53
#16 0x00007f12bbd3f05c in msort_with_tmp (n=282, b=0x7f11f63d24e0, p=0x7f1220e09d20) at msort.c:45
#17 __GI_qsort_r (b=0x7f11f63d24e0, n=282, s=8, cmp=0x7f12bf2bae20 <PackageNameComparator(SoftwarePackage const**, SoftwarePackage const**)>, arg=<optimized out>) at msort.c:297
#18 0x00007f12bf2cc4fa in sort (cb=<optimized out>, this=<optimized out>) at ../../../include/nms_util.h:726
#19 Node::updateSoftwarePackages (this=0x7ffc4e0, poller=0x11a, requestId=0) at node.cpp:2214
#20 0x00007f12bf2d07d5 in Node::configurationPoll (this=0x7ffc4e0, pSession=0x0, dwRqId=0, poller=0x7f127c389dc0, maskBits=0) at node.cpp:2428
#21 0x00007f12bf2d0ec3 in Node::configurationPoll (this=0x7ffc4e0, poller=0x7f127c389dc0) at node.cpp:2275
#22 0x00007f12bf268781 in __ThreadPoolExecute_Wrapper<Node, PollerInfo*> (arg=0x7f127c38a040) at ../../../include/nms_threads.h:970
#23 0x00007f12be2e53b6 in WorkerThread (arg=0x7f127c1aba10) at tp.cpp:127
#24 0x00007f12bcf3a064 in start_thread (arg=0x7f1220e0e700) at pthread_create.c:309
#25 0x00007f12bbdf062d in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:111

Victor Kirhenshtein

Hi,

It looks like older agents sometimes send corrupted message which cause server to crash. We fixed server side in 2.1.2 (ignore malformed messages). Upgrading agent to 2.1.1 should also help.

Best regards,
Victor