Uploaded image for project: 'Couchbase Server'
  1. Couchbase Server
  2. MB-11020

addNode failed when they were idle for some time

    XMLWordPrintable

Details

    • Bug
    • Resolution: Fixed
    • Critical
    • 3.0
    • 3.0
    • ns_server
    • Security Level: Public
    • None
    • Untriaged
    • Unknown

    Description

      I install cb on some vms few hours before and then I tried to add 172.23.105.159 + 72.23.105.160
      in the first attempt I got

      [error_logger:info,2014-05-02T6:13:24.029,ns_1@127.0.0.1:error_logger<0.6.0>:ale_error_logger_handler:do_log:207]
      =========================PROGRESS REPORT=========================
      supervisor:

      {local,net_sup}
      started: [{pid,<0.16105.1>},
      {name,erl_epmd},
      {mfargs,{erl_epmd,start_link,[]}},
      {restart_type,permanent},
      {shutdown,2000},
      {child_type,worker}]

      [error_logger:info,2014-05-02T6:13:24.029,ns_1@127.0.0.1:error_logger<0.6.0>:ale_error_logger_handler:do_log:207]
      =========================PROGRESS REPORT=========================
      supervisor: {local,net_sup}

      started: [

      {pid,<0.16106.1>}

      ,

      {name,auth}

      ,
      {mfargs,{auth,start_link,[]}},

      {restart_type,permanent},
      {shutdown,2000},
      {child_type,worker}]

      [ns_server:debug,2014-05-02T6:13:24.030,nonode@nohost:<0.724.0>:ns_pubsub:do_subscribe_link:136]Parent process of subscription {ns_config_events,<0.722.0>} exited with reason noconnection
      [user:warn,2014-05-02T6:13:24.030,nonode@nohost:ns_node_disco<0.609.0>:ns_node_disco:handle_info:175]Node nonode@nohost saw that node 'ns_1@127.0.0.1' went down. Details: [{nodedown_reason, net_kernel_terminated}]
      [error_logger:info,2014-05-02T6:13:24.031,nonode@nohost:error_logger<0.6.0>:ale_error_logger_handler:do_log:207]
      =========================PROGRESS REPORT=========================
      supervisor: {local,ns_server_sup}
      started: [{pid,<0.16108.1>},
      {name,ns_ports_setup},
      {mfa,{ns_ports_setup,start,[]}},
      {restart_type,{permanent,4}},
      {shutdown,brutal_kill},
      {child_type,worker}]

      [error_logger:info,2014-05-02T6:13:24.036,nonode@nohost:error_logger<0.6.0>:ale_error_logger_handler:do_log:207]Protocol: "inet_tcp": the name ns_1@172.23.105.159 seems to be in use by another Erlang node
      [ns_server:info,2014-05-02T6:13:24.037,nonode@nohost:ns_node_disco_events<0.608.0>:ns_node_disco_log:handle_event:46]ns_node_disco_log: nodes changed: []
      [error_logger:error,2014-05-02T6:13:24.037,nonode@nohost:error_logger<0.6.0>:ale_error_logger_handler:do_log:207]
      =========================CRASH REPORT=========================
      crasher:
      initial call: net_kernel:init/1
      pid: <0.16107.1>
      registered_name: []
      exception exit: {error,badarg}
      in function gen_server:init_it/6 (gen_server.erl, line 320)
      ancestors: [net_sup,kernel_sup,<0.10.0>]
      messages: []
      links: Port<0.19771>,<0.16104.1>
      dictionary: [{longnames,true}]
      trap_exit: true
      status: running
      heap_size: 610
      stack_size: 27
      reductions: 1020
      neighbours:

      [ns_server:info,2014-05-02T6:13:24.037,nonode@nohost:dist_manager<0.16110.1>:dist_manager:read_address_config_from_path:83]Reading ip config from "/opt/couchbase/var/lib/couchbase/ip_start"
      [error_logger:error,2014-05-02T6:13:24.037,nonode@nohost:error_logger<0.6.0>:ale_error_logger_handler:do_log:207]
      =========================SUPERVISOR REPORT=========================
      Supervisor: {local,net_sup}
      Context: start_error
      Reason: {'EXIT',nodistribution}
      Offender: [{pid,undefined},
      {name,net_kernel},
      {mfargs,{net_kernel,start_link,
      [['ns_1@172.23.105.159',longnames]]}},
      {restart_type,permanent}

      ,

      {shutdown,2000}

      ,

      {child_type,worker}

      ]

      [ns_server:error,2014-05-02T6:13:24.037,nonode@nohost:<0.704.0>:menelaus_web:loop:164]Server error during processing: ["web request failed",

      {path, "/pools/default/serverGroups/0/addNode"}

      ,

      {type,exit}

      ,
      {what,
      {{{function_clause,
      [{dist_manager,decode_status,
      [{error,
      {{shutdown,
      {failed_to_start_child,net_kernel,

      {'EXIT',nodistribution}}},
      {child,undefined,net_sup_dynamic,
      {erl_distribution,start_link, [['ns_1@172.23.105.159', longnames]]},
      permanent,1000,supervisor,
      [erl_distribution]}}}],
      [{file,"src/dist_manager.erl"},
      {line,184}]},
      {dist_manager,bringup,2,
      [{file,"src/dist_manager.erl"},
      {line,231}]},
      {dist_manager,do_adjust_address,3,
      [{file,"src/dist_manager.erl"},
      {line,250}]},
      {gen_server,handle_msg,5,
      [{file,"gen_server.erl"},{line,585}]},
      {proc_lib,init_p_do_apply,3,
      [{file,"proc_lib.erl"},{line,239}]}]},
      {gen_server,call,
      [dist_manager,
      {adjust_my_address,"172.23.105.159", false}]}},
      {gen_server,call,
      [ns_cluster,
      {add_node_to_group,"172.23.105.160",
      8091,
      {"Administrator","password"},
      <<"0">>},
      65000]}}},
      {trace,
      [{gen_server,call,3,
      [{file,"gen_server.erl"},{line,188}]
      },
      {ns_cluster,add_node_to_group,4,
      [{file,"src/ns_cluster.erl"},{line,84}]},
      {menelaus_web,do_handle_add_node,2,
      [{file,"src/menelaus_web.erl"},
      {line,2493}]},
      {request_throttler,do_request,3,
      [{file,"src/request_throttler.erl"},
      {line,59}]},
      {menelaus_web,loop,3,
      [{file,"src/menelaus_web.erl"},
      {line,143}]},
      {mochiweb_http,headers,5,
      [{file, "/home/buildbot/centos-5-x64-300-builder/build/build/couchdb/src/mochiweb/mochiweb_http.erl"},
      {line,94}]},
      {proc_lib,init_p_do_apply,3,
      [{file,"proc_lib.erl"},{line,239}]}]}]
      [error_logger:error,2014-05-02T6:13:24.038,nonode@nohost:error_logger<0.6.0>:ale_error_logger_handler:do_log:207]** Generic server dist_manager terminating
      ** Last message in was {adjust_my_address,"172.23.105.159",false}
      ** When Server state == {state,true,false,"127.0.0.1"}
      ** Reason for termination ==
      ** {function_clause,
      [{dist_manager,decode_status,
      [{error,
      {{shutdown,
      {failed_to_start_child,net_kernel,
      {'EXIT',nodistribution}

      }},
      {child,undefined,net_sup_dynamic,

      {erl_distribution,start_link, [['ns_1@172.23.105.159',longnames]]}

      ,
      permanent,1000,supervisor,
      [erl_distribution]}}}],
      [

      {file,"src/dist_manager.erl"},{line,184}]},
      {dist_manager,bringup,2,[{file,"src/dist_manager.erl"}

      ,

      {line,231}]},
      {dist_manager,do_adjust_address,3,
      [{file,"src/dist_manager.erl"},{line,250}]},
      {gen_server,handle_msg,5,[{file,"gen_server.erl"},{line,585}]},
      {proc_lib,init_p_do_apply,3,[{file,"proc_lib.erl"},{line,239}]}]}

      [ns_server:info,2014-05-02T6:13:24.038,nonode@nohost:dist_manager<0.16110.1>:dist_manager:read_address_config_from_path:83]Reading ip config from "/opt/couchbase/var/lib/couchbase/ip"
      [ns_server:info,2014-05-02T6:13:24.038,nonode@nohost:dist_manager<0.16110.1>:dist_manager:init:159]ip config not found. Looks like we're brand new node
      [ns_server:info,2014-05-02T6:13:24.039,nonode@nohost:dist_manager<0.16110.1>:dist_manager:bringup:230]Attempting to bring up net_kernel with name 'ns_1@127.0.0.1'
      [error_logger:error,2014-05-02T6:13:24.039,nonode@nohost:error_logger<0.6.0>:ale_error_logger_handler:do_log:207]
      =========================CRASH REPORT=========================
      crasher:
      initial call: dist_manager:init/1
      pid: <0.276.0>
      registered_name: dist_manager
      exception exit: {function_clause,
      [{dist_manager,decode_status,
      [{error,
      {{shutdown,
      {failed_to_start_child,net_kernel,
      {'EXIT',nodistribution}}},
      {child,undefined,net_sup_dynamic,
      {erl_distribution,start_link, [['ns_1@172.23.105.159',longnames]]},
      permanent,1000,supervisor,
      [erl_distribution]}}}],
      [{file,"src/dist_manager.erl"},{line,184}]
      },
      {dist_manager,bringup,2,
      [{file,"src/dist_manager.erl"},{line,231}

      ]},
      {dist_manager,do_adjust_address,3,
      [

      {file,"src/dist_manager.erl"}

      ,

      {line,250}

      ]},
      {gen_server,handle_msg,5,
      [

      {file,"gen_server.erl"}

      ,

      {line,585}

      ]},
      {proc_lib,init_p_do_apply,3,
      [

      {file,"proc_lib.erl"}

      ,

      {line,239}

      ]}]}
      in function gen_server:terminate/6 (gen_server.erl, line 744)
      ancestors: [ns_server_cluster_sup,<0.58.0>]
      messages: []
      links: [<0.172.0>]
      dictionary: []
      trap_exit: false
      status: running
      heap_size: 2586
      stack_size: 27
      reductions: 8236
      neighbours:

      very similar to MB-9595

      Attachments

        Issue Links

          For Gerrit Dashboard: MB-11020
          # Subject Branch Project Status CR V

          Activity

            People

              iryna iryna
              andreibaranouski Andrei Baranouski
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

              Dates

                Created:
                Updated:
                Resolved:

                Gerrit Reviews

                  There are no open Gerrit changes

                  PagerDuty