root/lib/cluster/membership.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. crm_remote_peer_cache_size
  2. crm_remote_peer_get
  3. crm_remote_peer_cache_remove
  4. remote_state_from_cib
  5. remote_cache_refresh_helper
  6. mark_dirty
  7. is_dirty
  8. crm_remote_peer_cache_refresh
  9. crm_is_peer_active
  10. crm_reap_dead_member
  11. reap_crm_member
  12. crm_count_peer
  13. crm_active_peers
  14. destroy_crm_node
  15. crm_peer_init
  16. crm_peer_destroy
  17. crm_set_status_callback
  18. crm_set_autoreap
  19. crm_dump_peer_hash
  20. crm_hash_find_by_data
  21. crm_find_peer_full
  22. crm_get_peer_full
  23. crm_find_peer
  24. crm_remove_conflicting_peer
  25. crm_get_peer
  26. crm_update_peer_uname
  27. crm_update_peer_proc
  28. crm_update_peer_expected
  29. crm_update_peer_state_iter
  30. crm_update_peer_state
  31. crm_reap_unseen_nodes
  32. crm_terminate_member
  33. crm_terminate_member_no_mainloop
  34. crm_find_known_peer
  35. known_peer_cache_refresh_helper
  36. crm_known_peer_cache_refresh
  37. crm_peer_caches_refresh
  38. crm_find_known_peer_full

   1 /*
   2  * Copyright 2004-2020 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU Lesser General Public License
   7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #ifndef _GNU_SOURCE
  13 #  define _GNU_SOURCE
  14 #endif
  15 
  16 #include <sys/param.h>
  17 #include <sys/types.h>
  18 #include <stdio.h>
  19 #include <unistd.h>
  20 #include <string.h>
  21 #include <glib.h>
  22 #include <crm/common/ipc.h>
  23 #include <crm/cluster/internal.h>
  24 #include <crm/msg_xml.h>
  25 #include <crm/stonith-ng.h>
  26 
  27 /* The peer cache remembers cluster nodes that have been seen.
  28  * This is managed mostly automatically by libcluster, based on
  29  * cluster membership events.
  30  *
  31  * Because cluster nodes can have conflicting names or UUIDs,
  32  * the hash table key is a uniquely generated ID.
  33  */
  34 GHashTable *crm_peer_cache = NULL;
  35 
  36 /*
  37  * The remote peer cache tracks pacemaker_remote nodes. While the
  38  * value has the same type as the peer cache's, it is tracked separately for
  39  * three reasons: pacemaker_remote nodes can't have conflicting names or UUIDs,
  40  * so the name (which is also the UUID) is used as the hash table key; there
  41  * is no equivalent of membership events, so management is not automatic; and
  42  * most users of the peer cache need to exclude pacemaker_remote nodes.
  43  *
  44  * That said, using a single cache would be more logical and less error-prone,
  45  * so it would be a good idea to merge them one day.
  46  *
  47  * libcluster provides two avenues for populating the cache:
  48  * crm_remote_peer_get() and crm_remote_peer_cache_remove() directly manage it,
  49  * while crm_remote_peer_cache_refresh() populates it via the CIB.
  50  */
  51 GHashTable *crm_remote_peer_cache = NULL;
  52 
  53 GHashTable *crm_known_peer_cache = NULL;
  54 
  55 unsigned long long crm_peer_seq = 0;
  56 gboolean crm_have_quorum = FALSE;
  57 static gboolean crm_autoreap  = TRUE;
  58 
  59 int
  60 crm_remote_peer_cache_size(void)
     /* [previous][next][first][last][top][bottom][index][help] */
  61 {
  62     if (crm_remote_peer_cache == NULL) {
  63         return 0;
  64     }
  65     return g_hash_table_size(crm_remote_peer_cache);
  66 }
  67 
  68 /*!
  69  * \brief Get a remote node peer cache entry, creating it if necessary
  70  *
  71  * \param[in] node_name  Name of remote node
  72  *
  73  * \return Cache entry for node on success, NULL (and set errno) otherwise
  74  *
  75  * \note When creating a new entry, this will leave the node state undetermined,
  76  *       so the caller should also call crm_update_peer_state() if the state is
  77  *       known.
  78  */
  79 crm_node_t *
  80 crm_remote_peer_get(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
  81 {
  82     crm_node_t *node;
  83 
  84     if (node_name == NULL) {
  85         errno = -EINVAL;
  86         return NULL;
  87     }
  88 
  89     /* Return existing cache entry if one exists */
  90     node = g_hash_table_lookup(crm_remote_peer_cache, node_name);
  91     if (node) {
  92         return node;
  93     }
  94 
  95     /* Allocate a new entry */
  96     node = calloc(1, sizeof(crm_node_t));
  97     if (node == NULL) {
  98         return NULL;
  99     }
 100 
 101     /* Populate the essential information */
 102     pcmk__set_peer_flags(node, crm_remote_node);
 103     node->uuid = strdup(node_name);
 104     if (node->uuid == NULL) {
 105         free(node);
 106         errno = -ENOMEM;
 107         return NULL;
 108     }
 109 
 110     /* Add the new entry to the cache */
 111     g_hash_table_replace(crm_remote_peer_cache, node->uuid, node);
 112     crm_trace("added %s to remote cache", node_name);
 113 
 114     /* Update the entry's uname, ensuring peer status callbacks are called */
 115     crm_update_peer_uname(node, node_name);
 116     return node;
 117 }
 118 
 119 void
 120 crm_remote_peer_cache_remove(const char *node_name)
     /* [previous][next][first][last][top][bottom][index][help] */
 121 {
 122     if (g_hash_table_remove(crm_remote_peer_cache, node_name)) {
 123         crm_trace("removed %s from remote peer cache", node_name);
 124     }
 125 }
 126 
 127 /*!
 128  * \internal
 129  * \brief Return node status based on a CIB status entry
 130  *
 131  * \param[in] node_state  XML of node state
 132  *
 133  * \return CRM_NODE_LOST if XML_NODE_IN_CLUSTER is false in node_state,
 134  *         CRM_NODE_MEMBER otherwise
 135  * \note Unlike most boolean XML attributes, this one defaults to true, for
 136  *       backward compatibility with older controllers that don't set it.
 137  */
 138 static const char *
 139 remote_state_from_cib(xmlNode *node_state)
     /* [previous][next][first][last][top][bottom][index][help] */
 140 {
 141     const char *status;
 142 
 143     status = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
 144     if (status && !crm_is_true(status)) {
 145         status = CRM_NODE_LOST;
 146     } else {
 147         status = CRM_NODE_MEMBER;
 148     }
 149     return status;
 150 }
 151 
 152 /* user data for looping through remote node xpath searches */
 153 struct refresh_data {
 154     const char *field;  /* XML attribute to check for node name */
 155     gboolean has_state; /* whether to update node state based on XML */
 156 };
 157 
 158 /*!
 159  * \internal
 160  * \brief Process one pacemaker_remote node xpath search result
 161  *
 162  * \param[in] result     XML search result
 163  * \param[in] user_data  what to look for in the XML
 164  */
 165 static void
 166 remote_cache_refresh_helper(xmlNode *result, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 167 {
 168     struct refresh_data *data = user_data;
 169     const char *remote = crm_element_value(result, data->field);
 170     const char *state = NULL;
 171     crm_node_t *node;
 172 
 173     CRM_CHECK(remote != NULL, return);
 174 
 175     /* Determine node's state, if the result has it */
 176     if (data->has_state) {
 177         state = remote_state_from_cib(result);
 178     }
 179 
 180     /* Check whether cache already has entry for node */
 181     node = g_hash_table_lookup(crm_remote_peer_cache, remote);
 182 
 183     if (node == NULL) {
 184         /* Node is not in cache, so add a new entry for it */
 185         node = crm_remote_peer_get(remote);
 186         CRM_ASSERT(node);
 187         if (state) {
 188             crm_update_peer_state(__func__, node, state, 0);
 189         }
 190 
 191     } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
 192         /* Node is in cache and hasn't been updated already, so mark it clean */
 193         pcmk__clear_peer_flags(node, crm_node_dirty);
 194         if (state) {
 195             crm_update_peer_state(__func__, node, state, 0);
 196         }
 197     }
 198 }
 199 
 200 static void
 201 mark_dirty(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 202 {
 203     pcmk__set_peer_flags((crm_node_t *) value, crm_node_dirty);
 204 }
 205 
 206 static gboolean
 207 is_dirty(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 208 {
 209     return pcmk_is_set(((crm_node_t*)value)->flags, crm_node_dirty);
 210 }
 211 
 212 /* search string to find CIB resources entries for guest nodes */
 213 #define XPATH_GUEST_NODE_CONFIG \
 214     "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
 215     "//" XML_TAG_META_SETS "//" XML_CIB_TAG_NVPAIR \
 216     "[@name='" XML_RSC_ATTR_REMOTE_NODE "']"
 217 
 218 /* search string to find CIB resources entries for remote nodes */
 219 #define XPATH_REMOTE_NODE_CONFIG \
 220     "//" XML_TAG_CIB "//" XML_CIB_TAG_CONFIGURATION "//" XML_CIB_TAG_RESOURCE \
 221     "[@type='remote'][@provider='pacemaker']"
 222 
 223 /* search string to find CIB node status entries for pacemaker_remote nodes */
 224 #define XPATH_REMOTE_NODE_STATUS \
 225     "//" XML_TAG_CIB "//" XML_CIB_TAG_STATUS "//" XML_CIB_TAG_STATE \
 226     "[@" XML_NODE_IS_REMOTE "='true']"
 227 
 228 /*!
 229  * \brief Repopulate the remote peer cache based on CIB XML
 230  *
 231  * \param[in] xmlNode  CIB XML to parse
 232  */
 233 void
 234 crm_remote_peer_cache_refresh(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
 235 {
 236     struct refresh_data data;
 237 
 238     crm_peer_init();
 239 
 240     /* First, we mark all existing cache entries as dirty,
 241      * so that later we can remove any that weren't in the CIB.
 242      * We don't empty the cache, because we need to detect changes in state.
 243      */
 244     g_hash_table_foreach(crm_remote_peer_cache, mark_dirty, NULL);
 245 
 246     /* Look for guest nodes and remote nodes in the status section */
 247     data.field = "id";
 248     data.has_state = TRUE;
 249     crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_STATUS,
 250                              remote_cache_refresh_helper, &data);
 251 
 252     /* Look for guest nodes and remote nodes in the configuration section,
 253      * because they may have just been added and not have a status entry yet.
 254      * In that case, the cached node state will be left NULL, so that the
 255      * peer status callback isn't called until we're sure the node started
 256      * successfully.
 257      */
 258     data.field = "value";
 259     data.has_state = FALSE;
 260     crm_foreach_xpath_result(cib, XPATH_GUEST_NODE_CONFIG,
 261                              remote_cache_refresh_helper, &data);
 262     data.field = "id";
 263     data.has_state = FALSE;
 264     crm_foreach_xpath_result(cib, XPATH_REMOTE_NODE_CONFIG,
 265                              remote_cache_refresh_helper, &data);
 266 
 267     /* Remove all old cache entries that weren't seen in the CIB */
 268     g_hash_table_foreach_remove(crm_remote_peer_cache, is_dirty, NULL);
 269 }
 270 
 271 gboolean
 272 crm_is_peer_active(const crm_node_t * node)
     /* [previous][next][first][last][top][bottom][index][help] */
 273 {
 274     if(node == NULL) {
 275         return FALSE;
 276     }
 277 
 278     if (pcmk_is_set(node->flags, crm_remote_node)) {
 279         /* remote nodes are never considered active members. This
 280          * guarantees they will never be considered for DC membership.*/
 281         return FALSE;
 282     }
 283 #if SUPPORT_COROSYNC
 284     if (is_corosync_cluster()) {
 285         return crm_is_corosync_peer_active(node);
 286     }
 287 #endif
 288     crm_err("Unhandled cluster type: %s", name_for_cluster_type(get_cluster_type()));
 289     return FALSE;
 290 }
 291 
 292 static gboolean
 293 crm_reap_dead_member(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 294 {
 295     crm_node_t *node = value;
 296     crm_node_t *search = user_data;
 297 
 298     if (search == NULL) {
 299         return FALSE;
 300 
 301     } else if (search->id && node->id != search->id) {
 302         return FALSE;
 303 
 304     } else if (search->id == 0 && !pcmk__str_eq(node->uname, search->uname, pcmk__str_casei)) {
 305         return FALSE;
 306 
 307     } else if (crm_is_peer_active(value) == FALSE) {
 308         crm_info("Removing node with name %s and id %u from membership cache",
 309                  (node->uname? node->uname : "unknown"), node->id);
 310         return TRUE;
 311     }
 312     return FALSE;
 313 }
 314 
 315 /*!
 316  * \brief Remove all peer cache entries matching a node ID and/or uname
 317  *
 318  * \param[in] id    ID of node to remove (or 0 to ignore)
 319  * \param[in] name  Uname of node to remove (or NULL to ignore)
 320  *
 321  * \return Number of cache entries removed
 322  */
 323 guint
 324 reap_crm_member(uint32_t id, const char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
 325 {
 326     int matches = 0;
 327     crm_node_t search;
 328 
 329     if (crm_peer_cache == NULL) {
 330         crm_trace("Membership cache not initialized, ignoring purge request");
 331         return 0;
 332     }
 333 
 334     search.id = id;
 335     search.uname = name ? strdup(name) : NULL;
 336     matches = g_hash_table_foreach_remove(crm_peer_cache, crm_reap_dead_member, &search);
 337     if(matches) {
 338         crm_notice("Purged %d peer%s with id=%u%s%s from the membership cache",
 339                    matches, pcmk__plural_s(matches), search.id,
 340                    (search.uname? " and/or uname=" : ""),
 341                    (search.uname? search.uname : ""));
 342 
 343     } else {
 344         crm_info("No peers with id=%u%s%s to purge from the membership cache",
 345                  search.id, (search.uname? " and/or uname=" : ""),
 346                  (search.uname? search.uname : ""));
 347     }
 348 
 349     free(search.uname);
 350     return matches;
 351 }
 352 
 353 static void
 354 crm_count_peer(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 355 {
 356     guint *count = user_data;
 357     crm_node_t *node = value;
 358 
 359     if (crm_is_peer_active(node)) {
 360         *count = *count + 1;
 361     }
 362 }
 363 
 364 guint
 365 crm_active_peers(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 366 {
 367     guint count = 0;
 368 
 369     if (crm_peer_cache) {
 370         g_hash_table_foreach(crm_peer_cache, crm_count_peer, &count);
 371     }
 372     return count;
 373 }
 374 
 375 static void
 376 destroy_crm_node(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 377 {
 378     crm_node_t *node = data;
 379 
 380     crm_trace("Destroying entry for node %u: %s", node->id, node->uname);
 381 
 382     free(node->uname);
 383     free(node->state);
 384     free(node->uuid);
 385     free(node->expected);
 386     free(node);
 387 }
 388 
 389 void
 390 crm_peer_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 391 {
 392     if (crm_peer_cache == NULL) {
 393         crm_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
 394     }
 395 
 396     if (crm_remote_peer_cache == NULL) {
 397         crm_remote_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, NULL, destroy_crm_node);
 398     }
 399 
 400     if (crm_known_peer_cache == NULL) {
 401         crm_known_peer_cache = g_hash_table_new_full(crm_strcase_hash, crm_strcase_equal, free, destroy_crm_node);
 402     }
 403 }
 404 
 405 void
 406 crm_peer_destroy(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 407 {
 408     if (crm_peer_cache != NULL) {
 409         crm_trace("Destroying peer cache with %d members", g_hash_table_size(crm_peer_cache));
 410         g_hash_table_destroy(crm_peer_cache);
 411         crm_peer_cache = NULL;
 412     }
 413 
 414     if (crm_remote_peer_cache != NULL) {
 415         crm_trace("Destroying remote peer cache with %d members", g_hash_table_size(crm_remote_peer_cache));
 416         g_hash_table_destroy(crm_remote_peer_cache);
 417         crm_remote_peer_cache = NULL;
 418     }
 419 
 420     if (crm_known_peer_cache != NULL) {
 421         crm_trace("Destroying known peer cache with %d members", g_hash_table_size(crm_known_peer_cache));
 422         g_hash_table_destroy(crm_known_peer_cache);
 423         crm_known_peer_cache = NULL;
 424     }
 425 
 426 }
 427 
 428 void (*crm_status_callback) (enum crm_status_type, crm_node_t *, const void *) = NULL;
 429 
 430 /*!
 431  * \brief Set a client function that will be called after peer status changes
 432  *
 433  * \param[in] dispatch  Pointer to function to use as callback
 434  *
 435  * \note Previously, client callbacks were responsible for peer cache
 436  *       management. This is no longer the case, and client callbacks should do
 437  *       only client-specific handling. Callbacks MUST NOT add or remove entries
 438  *       in the peer caches.
 439  */
 440 void
 441 crm_set_status_callback(void (*dispatch) (enum crm_status_type, crm_node_t *, const void *))
     /* [previous][next][first][last][top][bottom][index][help] */
 442 {
 443     crm_status_callback = dispatch;
 444 }
 445 
 446 /*!
 447  * \brief Tell the library whether to automatically reap lost nodes
 448  *
 449  * If TRUE (the default), calling crm_update_peer_proc() will also update the
 450  * peer state to CRM_NODE_MEMBER or CRM_NODE_LOST, and crm_update_peer_state()
 451  * will reap peers whose state changes to anything other than CRM_NODE_MEMBER.
 452  * Callers should leave this enabled unless they plan to manage the cache
 453  * separately on their own.
 454  *
 455  * \param[in] autoreap  TRUE to enable automatic reaping, FALSE to disable
 456  */
 457 void
 458 crm_set_autoreap(gboolean autoreap)
     /* [previous][next][first][last][top][bottom][index][help] */
 459 {
 460     crm_autoreap = autoreap;
 461 }
 462 
 463 static void crm_dump_peer_hash(int level, const char *caller)
     /* [previous][next][first][last][top][bottom][index][help] */
 464 {
 465     GHashTableIter iter;
 466     const char *id = NULL;
 467     crm_node_t *node = NULL;
 468 
 469     g_hash_table_iter_init(&iter, crm_peer_cache);
 470     while (g_hash_table_iter_next(&iter, (gpointer *) &id, (gpointer *) &node)) {
 471         do_crm_log(level, "%s: Node %u/%s = %p - %s", caller, node->id, node->uname, node, id);
 472     }
 473 }
 474 
 475 static gboolean crm_hash_find_by_data(gpointer key, gpointer value, gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 476 {
 477     if(value == user_data) {
 478         return TRUE;
 479     }
 480     return FALSE;
 481 }
 482 
 483 crm_node_t *
 484 crm_find_peer_full(unsigned int id, const char *uname, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 485 {
 486     crm_node_t *node = NULL;
 487 
 488     CRM_ASSERT(id > 0 || uname != NULL);
 489 
 490     crm_peer_init();
 491 
 492     if ((uname != NULL) && (flags & CRM_GET_PEER_REMOTE)) {
 493         node = g_hash_table_lookup(crm_remote_peer_cache, uname);
 494     }
 495 
 496     if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
 497         node = crm_find_peer(id, uname);
 498     }
 499     return node;
 500 }
 501 
 502 crm_node_t *
 503 crm_get_peer_full(unsigned int id, const char *uname, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
 504 {
 505     crm_node_t *node = NULL;
 506 
 507     CRM_ASSERT(id > 0 || uname != NULL);
 508 
 509     crm_peer_init();
 510 
 511     if (flags & CRM_GET_PEER_REMOTE) {
 512         node = g_hash_table_lookup(crm_remote_peer_cache, uname);
 513     }
 514 
 515     if (node == NULL && (flags & CRM_GET_PEER_CLUSTER)) {
 516         node = crm_get_peer(id, uname);
 517     }
 518     return node;
 519 }
 520 
 521 crm_node_t *
 522 crm_find_peer(unsigned int id, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
 523 {
 524     GHashTableIter iter;
 525     crm_node_t *node = NULL;
 526     crm_node_t *by_id = NULL;
 527     crm_node_t *by_name = NULL;
 528 
 529     CRM_ASSERT(id > 0 || uname != NULL);
 530 
 531     crm_peer_init();
 532 
 533     if (uname != NULL) {
 534         g_hash_table_iter_init(&iter, crm_peer_cache);
 535         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 536             if(node->uname && strcasecmp(node->uname, uname) == 0) {
 537                 crm_trace("Name match: %s = %p", node->uname, node);
 538                 by_name = node;
 539                 break;
 540             }
 541         }
 542     }
 543 
 544     if (id > 0) {
 545         g_hash_table_iter_init(&iter, crm_peer_cache);
 546         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 547             if(node->id == id) {
 548                 crm_trace("ID match: %u = %p", node->id, node);
 549                 by_id = node;
 550                 break;
 551             }
 552         }
 553     }
 554 
 555     node = by_id; /* Good default */
 556     if(by_id == by_name) {
 557         /* Nothing to do if they match (both NULL counts) */
 558         crm_trace("Consistent: %p for %u/%s", by_id, id, uname);
 559 
 560     } else if(by_id == NULL && by_name) {
 561         crm_trace("Only one: %p for %u/%s", by_name, id, uname);
 562 
 563         if(id && by_name->id) {
 564             crm_dump_peer_hash(LOG_WARNING, __func__);
 565             crm_crit("Node %u and %u share the same name '%s'",
 566                      id, by_name->id, uname);
 567             node = NULL; /* Create a new one */
 568 
 569         } else {
 570             node = by_name;
 571         }
 572 
 573     } else if(by_name == NULL && by_id) {
 574         crm_trace("Only one: %p for %u/%s", by_id, id, uname);
 575 
 576         if(uname && by_id->uname) {
 577             crm_dump_peer_hash(LOG_WARNING, __func__);
 578             crm_crit("Node '%s' and '%s' share the same cluster nodeid %u: assuming '%s' is correct",
 579                      uname, by_id->uname, id, uname);
 580         }
 581 
 582     } else if(uname && by_id->uname) {
 583         if(pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
 584             crm_notice("Node '%s' has changed its ID from %u to %u", by_id->uname, by_name->id, by_id->id);
 585             g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
 586 
 587         } else {
 588             crm_warn("Node '%s' and '%s' share the same cluster nodeid: %u %s", by_id->uname, by_name->uname, id, uname);
 589             crm_dump_peer_hash(LOG_INFO, __func__);
 590             crm_abort(__FILE__, __func__, __LINE__, "member weirdness", TRUE,
 591                       TRUE);
 592         }
 593 
 594     } else if(id && by_name->id) {
 595         crm_warn("Node %u and %u share the same name: '%s'", by_id->id, by_name->id, uname);
 596 
 597     } else {
 598         /* Simple merge */
 599 
 600         /* Only corosync-based clusters use node IDs. The functions that call
 601          * crm_update_peer_state() and crm_update_peer_proc() only know nodeid,
 602          * so 'by_id' is authoritative when merging.
 603          */
 604         crm_dump_peer_hash(LOG_DEBUG, __func__);
 605 
 606         crm_info("Merging %p into %p", by_name, by_id);
 607         g_hash_table_foreach_remove(crm_peer_cache, crm_hash_find_by_data, by_name);
 608     }
 609 
 610     return node;
 611 }
 612 
 613 #if SUPPORT_COROSYNC
 614 static guint
 615 crm_remove_conflicting_peer(crm_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 616 {
 617     int matches = 0;
 618     GHashTableIter iter;
 619     crm_node_t *existing_node = NULL;
 620 
 621     if (node->id == 0 || node->uname == NULL) {
 622         return 0;
 623     }
 624 
 625     if (corosync_cmap_has_config("nodelist") != 0) {
 626         return 0;
 627     }
 628 
 629     g_hash_table_iter_init(&iter, crm_peer_cache);
 630     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &existing_node)) {
 631         if (existing_node->id > 0
 632             && existing_node->id != node->id
 633             && existing_node->uname != NULL
 634             && strcasecmp(existing_node->uname, node->uname) == 0) {
 635 
 636             if (crm_is_peer_active(existing_node)) {
 637                 continue;
 638             }
 639 
 640             crm_warn("Removing cached offline node %u/%s which has conflicting uname with %u",
 641                      existing_node->id, existing_node->uname, node->id);
 642 
 643             g_hash_table_iter_remove(&iter);
 644             matches++;
 645         }
 646     }
 647 
 648     return matches;
 649 }
 650 #endif
 651 
 652 /* coverity[-alloc] Memory is referenced in one or both hashtables */
 653 crm_node_t *
 654 crm_get_peer(unsigned int id, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
 655 {
 656     crm_node_t *node = NULL;
 657     char *uname_lookup = NULL;
 658 
 659     CRM_ASSERT(id > 0 || uname != NULL);
 660 
 661     crm_peer_init();
 662 
 663     node = crm_find_peer(id, uname);
 664 
 665     /* if uname wasn't provided, and find_peer did not turn up a uname based on id.
 666      * we need to do a lookup of the node name using the id in the cluster membership. */
 667     if ((node == NULL || node->uname == NULL) && (uname == NULL)) { 
 668         uname_lookup = get_node_name(id);
 669     }
 670 
 671     if (uname_lookup) {
 672         uname = uname_lookup;
 673         crm_trace("Inferred a name of '%s' for node %u", uname, id);
 674 
 675         /* try to turn up the node one more time now that we know the uname. */
 676         if (node == NULL) {
 677             node = crm_find_peer(id, uname);
 678         }
 679     }
 680 
 681 
 682     if (node == NULL) {
 683         char *uniqueid = crm_generate_uuid();
 684 
 685         node = calloc(1, sizeof(crm_node_t));
 686         CRM_ASSERT(node);
 687 
 688         crm_info("Created entry %s/%p for node %s/%u (%d total)",
 689                  uniqueid, node, uname, id, 1 + g_hash_table_size(crm_peer_cache));
 690         g_hash_table_replace(crm_peer_cache, uniqueid, node);
 691     }
 692 
 693     if(id > 0 && uname && (node->id == 0 || node->uname == NULL)) {
 694         crm_info("Node %u is now known as %s", id, uname);
 695     }
 696 
 697     if(id > 0 && node->id == 0) {
 698         node->id = id;
 699     }
 700 
 701     if (uname && (node->uname == NULL)) {
 702         crm_update_peer_uname(node, uname);
 703     }
 704 
 705     if(node->uuid == NULL) {
 706         const char *uuid = crm_peer_uuid(node);
 707 
 708         if (uuid) {
 709             crm_info("Node %u has uuid %s", id, uuid);
 710 
 711         } else {
 712             crm_info("Cannot obtain a UUID for node %u/%s", id, node->uname);
 713         }
 714     }
 715 
 716     free(uname_lookup);
 717 
 718     return node;
 719 }
 720 
 721 /*!
 722  * \internal
 723  * \brief Update a node's uname
 724  *
 725  * \param[in] node        Node object to update
 726  * \param[in] uname       New name to set
 727  *
 728  * \note This function should not be called within a peer cache iteration,
 729  *       because in some cases it can remove conflicting cache entries,
 730  *       which would invalidate the iterator.
 731  */
 732 void
 733 crm_update_peer_uname(crm_node_t *node, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
 734 {
 735     CRM_CHECK(uname != NULL,
 736               crm_err("Bug: can't update node name without name"); return);
 737     CRM_CHECK(node != NULL,
 738               crm_err("Bug: can't update node name to %s without node", uname);
 739               return);
 740 
 741     if (pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
 742         crm_debug("Node uname '%s' did not change", uname);
 743         return;
 744     }
 745 
 746     for (const char *c = uname; *c; ++c) {
 747         if ((*c >= 'A') && (*c <= 'Z')) {
 748             crm_warn("Node names with capitals are discouraged, consider changing '%s'",
 749                      uname);
 750             break;
 751         }
 752     }
 753 
 754     free(node->uname);
 755     node->uname = strdup(uname);
 756     CRM_ASSERT(node->uname != NULL);
 757 
 758     if (crm_status_callback) {
 759         crm_status_callback(crm_status_uname, node, NULL);
 760     }
 761 
 762 #if SUPPORT_COROSYNC
 763     if (is_corosync_cluster() && !pcmk_is_set(node->flags, crm_remote_node)) {
 764         crm_remove_conflicting_peer(node);
 765     }
 766 #endif
 767 }
 768 
 769 /*!
 770  * \internal
 771  * \brief Update a node's process information (and potentially state)
 772  *
 773  * \param[in] source      Caller's function name (for log messages)
 774  * \param[in] node        Node object to update
 775  * \param[in] flag        Bitmask of new process information
 776  * \param[in] status      node status (online, offline, etc.)
 777  *
 778  * \return NULL if any node was reaped from peer caches, value of node otherwise
 779  *
 780  * \note If this function returns NULL, the supplied node object was likely
 781  *       freed and should not be used again. This function should not be
 782  *       called within a cache iteration if reaping is possible, otherwise
 783  *       reaping could invalidate the iterator.
 784  */
 785 crm_node_t *
 786 crm_update_peer_proc(const char *source, crm_node_t * node, uint32_t flag, const char *status)
     /* [previous][next][first][last][top][bottom][index][help] */
 787 {
 788     uint32_t last = 0;
 789     gboolean changed = FALSE;
 790 
 791     CRM_CHECK(node != NULL, crm_err("%s: Could not set %s to %s for NULL",
 792                                     source, peer2text(flag), status); return NULL);
 793 
 794     /* Pacemaker doesn't spawn processes on remote nodes */
 795     if (pcmk_is_set(node->flags, crm_remote_node)) {
 796         return node;
 797     }
 798 
 799     last = node->processes;
 800     if (status == NULL) {
 801         node->processes = flag;
 802         if (node->processes != last) {
 803             changed = TRUE;
 804         }
 805 
 806     } else if (pcmk__str_eq(status, ONLINESTATUS, pcmk__str_casei)) {
 807         if ((node->processes & flag) != flag) {
 808             node->processes = pcmk__set_flags_as(__func__, __LINE__,
 809                                                  LOG_TRACE, "Peer process",
 810                                                  node->uname, node->processes,
 811                                                  flag, "processes");
 812             changed = TRUE;
 813         }
 814 
 815     } else if (node->processes & flag) {
 816         node->processes = pcmk__clear_flags_as(__func__, __LINE__,
 817                                                LOG_TRACE, "Peer process",
 818                                                node->uname, node->processes,
 819                                                flag, "processes");
 820         changed = TRUE;
 821     }
 822 
 823     if (changed) {
 824         if (status == NULL && flag <= crm_proc_none) {
 825             crm_info("%s: Node %s[%u] - all processes are now offline", source, node->uname,
 826                      node->id);
 827         } else {
 828             crm_info("%s: Node %s[%u] - %s is now %s", source, node->uname, node->id,
 829                      peer2text(flag), status);
 830         }
 831 
 832         /* Call the client callback first, then update the peer state,
 833          * in case the node will be reaped
 834          */
 835         if (crm_status_callback) {
 836             crm_status_callback(crm_status_processes, node, &last);
 837         }
 838 
 839         /* The client callback shouldn't touch the peer caches,
 840          * but as a safety net, bail if the peer cache was destroyed.
 841          */
 842         if (crm_peer_cache == NULL) {
 843             return NULL;
 844         }
 845 
 846         if (crm_autoreap) {
 847             const char *peer_state = NULL;
 848 
 849             if (pcmk_is_set(node->processes, crm_get_cluster_proc())) {
 850                 peer_state = CRM_NODE_MEMBER;
 851             } else {
 852                 peer_state = CRM_NODE_LOST;
 853             }
 854             node = crm_update_peer_state(__func__, node, peer_state, 0);
 855         }
 856     } else {
 857         crm_trace("%s: Node %s[%u] - %s is unchanged (%s)", source, node->uname, node->id,
 858                   peer2text(flag), status);
 859     }
 860     return node;
 861 }
 862 
 863 void
 864 crm_update_peer_expected(const char *source, crm_node_t * node, const char *expected)
     /* [previous][next][first][last][top][bottom][index][help] */
 865 {
 866     char *last = NULL;
 867     gboolean changed = FALSE;
 868 
 869     CRM_CHECK(node != NULL, crm_err("%s: Could not set 'expected' to %s", source, expected);
 870               return);
 871 
 872     /* Remote nodes don't participate in joins */
 873     if (pcmk_is_set(node->flags, crm_remote_node)) {
 874         return;
 875     }
 876 
 877     last = node->expected;
 878     if (expected != NULL && !pcmk__str_eq(node->expected, expected, pcmk__str_casei)) {
 879         node->expected = strdup(expected);
 880         changed = TRUE;
 881     }
 882 
 883     if (changed) {
 884         crm_info("%s: Node %s[%u] - expected state is now %s (was %s)", source, node->uname, node->id,
 885                  expected, last);
 886         free(last);
 887     } else {
 888         crm_trace("%s: Node %s[%u] - expected state is unchanged (%s)", source, node->uname,
 889                   node->id, expected);
 890     }
 891 }
 892 
 893 /*!
 894  * \internal
 895  * \brief Update a node's state and membership information
 896  *
 897  * \param[in] source      Caller's function name (for log messages)
 898  * \param[in] node        Node object to update
 899  * \param[in] state       Node's new state
 900  * \param[in] membership  Node's new membership ID
 901  * \param[in] iter        If not NULL, pointer to node's peer cache iterator
 902  *
 903  * \return NULL if any node was reaped, value of node otherwise
 904  *
 905  * \note If this function returns NULL, the supplied node object was likely
 906  *       freed and should not be used again. This function may be called from
 907  *       within a peer cache iteration if the iterator is supplied.
 908  */
 909 static crm_node_t *
 910 crm_update_peer_state_iter(const char *source, crm_node_t * node, const char *state, uint64_t membership, GHashTableIter *iter)
     /* [previous][next][first][last][top][bottom][index][help] */
 911 {
 912     gboolean is_member;
 913 
 914     CRM_CHECK(node != NULL,
 915               crm_err("Could not set state for unknown host to %s"
 916                       CRM_XS " source=%s", state, source);
 917               return NULL);
 918 
 919     is_member = pcmk__str_eq(state, CRM_NODE_MEMBER, pcmk__str_casei);
 920     if (is_member) {
 921         node->when_lost = 0;
 922         if (membership) {
 923             node->last_seen = membership;
 924         }
 925     }
 926 
 927     if (state && !pcmk__str_eq(node->state, state, pcmk__str_casei)) {
 928         char *last = node->state;
 929 
 930         node->state = strdup(state);
 931         crm_notice("Node %s state is now %s " CRM_XS
 932                    " nodeid=%u previous=%s source=%s", node->uname, state,
 933                    node->id, (last? last : "unknown"), source);
 934         if (crm_status_callback) {
 935             crm_status_callback(crm_status_nstate, node, last);
 936         }
 937         free(last);
 938 
 939         if (crm_autoreap && !is_member
 940             && !pcmk_is_set(node->flags, crm_remote_node)) {
 941             /* We only autoreap from the peer cache, not the remote peer cache,
 942              * because the latter should be managed only by
 943              * crm_remote_peer_cache_refresh().
 944              */
 945             if(iter) {
 946                 crm_notice("Purged 1 peer with id=%u and/or uname=%s from the membership cache", node->id, node->uname);
 947                 g_hash_table_iter_remove(iter);
 948 
 949             } else {
 950                 reap_crm_member(node->id, node->uname);
 951             }
 952             node = NULL;
 953         }
 954 
 955     } else {
 956         crm_trace("Node %s state is unchanged (%s) " CRM_XS
 957                   " nodeid=%u source=%s", node->uname, state, node->id, source);
 958     }
 959     return node;
 960 }
 961 
 962 /*!
 963  * \brief Update a node's state and membership information
 964  *
 965  * \param[in] source      Caller's function name (for log messages)
 966  * \param[in] node        Node object to update
 967  * \param[in] state       Node's new state
 968  * \param[in] membership  Node's new membership ID
 969  *
 970  * \return NULL if any node was reaped, value of node otherwise
 971  *
 972  * \note If this function returns NULL, the supplied node object was likely
 973  *       freed and should not be used again. This function should not be
 974  *       called within a cache iteration if reaping is possible,
 975  *       otherwise reaping could invalidate the iterator.
 976  */
 977 crm_node_t *
 978 crm_update_peer_state(const char *source, crm_node_t * node, const char *state, uint64_t membership)
     /* [previous][next][first][last][top][bottom][index][help] */
 979 {
 980     return crm_update_peer_state_iter(source, node, state, membership, NULL);
 981 }
 982 
 983 /*!
 984  * \internal
 985  * \brief Reap all nodes from cache whose membership information does not match
 986  *
 987  * \param[in] membership  Membership ID of nodes to keep
 988  */
 989 void
 990 crm_reap_unseen_nodes(uint64_t membership)
     /* [previous][next][first][last][top][bottom][index][help] */
 991 {
 992     GHashTableIter iter;
 993     crm_node_t *node = NULL;
 994 
 995     crm_trace("Reaping unseen nodes...");
 996     g_hash_table_iter_init(&iter, crm_peer_cache);
 997     while (g_hash_table_iter_next(&iter, NULL, (gpointer *)&node)) {
 998         if (node->last_seen != membership) {
 999             if (node->state) {
1000                 /*
1001                  * Calling crm_update_peer_state_iter() allows us to
1002                  * remove the node from crm_peer_cache without
1003                  * invalidating our iterator
1004                  */
1005                 crm_update_peer_state_iter(__func__, node, CRM_NODE_LOST,
1006                                            membership, &iter);
1007 
1008             } else {
1009                 crm_info("State of node %s[%u] is still unknown",
1010                          node->uname, node->id);
1011             }
1012         }
1013     }
1014 }
1015 
1016 int
1017 crm_terminate_member(int nodeid, const char *uname, void *unused)
     /* [previous][next][first][last][top][bottom][index][help] */
1018 {
1019     /* Always use the synchronous, non-mainloop version */
1020     return stonith_api_kick(nodeid, uname, 120, TRUE);
1021 }
1022 
1023 int
1024 crm_terminate_member_no_mainloop(int nodeid, const char *uname, int *connection)
     /* [previous][next][first][last][top][bottom][index][help] */
1025 {
1026     return stonith_api_kick(nodeid, uname, 120, TRUE);
1027 }
1028 
1029 static crm_node_t *
1030 crm_find_known_peer(const char *id, const char *uname)
     /* [previous][next][first][last][top][bottom][index][help] */
1031 {
1032     GHashTableIter iter;
1033     crm_node_t *node = NULL;
1034     crm_node_t *by_id = NULL;
1035     crm_node_t *by_name = NULL;
1036 
1037     if (uname) {
1038         g_hash_table_iter_init(&iter, crm_known_peer_cache);
1039         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1040             if (node->uname && strcasecmp(node->uname, uname) == 0) {
1041                 crm_trace("Name match: %s = %p", node->uname, node);
1042                 by_name = node;
1043                 break;
1044             }
1045         }
1046     }
1047 
1048     if (id) {
1049         g_hash_table_iter_init(&iter, crm_known_peer_cache);
1050         while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
1051             if(strcasecmp(node->uuid, id) == 0) {
1052                 crm_trace("ID match: %s= %p", id, node);
1053                 by_id = node;
1054                 break;
1055             }
1056         }
1057     }
1058 
1059     node = by_id; /* Good default */
1060     if (by_id == by_name) {
1061         /* Nothing to do if they match (both NULL counts) */
1062         crm_trace("Consistent: %p for %s/%s", by_id, id, uname);
1063 
1064     } else if (by_id == NULL && by_name) {
1065         crm_trace("Only one: %p for %s/%s", by_name, id, uname);
1066 
1067         if (id) {
1068             node = NULL;
1069 
1070         } else {
1071             node = by_name;
1072         }
1073 
1074     } else if (by_name == NULL && by_id) {
1075         crm_trace("Only one: %p for %s/%s", by_id, id, uname);
1076 
1077         if (uname) {
1078             node = NULL;
1079         }
1080 
1081     } else if (uname && by_id->uname
1082                && pcmk__str_eq(uname, by_id->uname, pcmk__str_casei)) {
1083         /* Multiple nodes have the same uname in the CIB.
1084          * Return by_id. */
1085 
1086     } else if (id && by_name->uuid
1087                && pcmk__str_eq(id, by_name->uuid, pcmk__str_casei)) {
1088         /* Multiple nodes have the same id in the CIB.
1089          * Return by_name. */
1090         node = by_name;
1091 
1092     } else {
1093         node = NULL;
1094     }
1095 
1096     if (node == NULL) {
1097         crm_debug("Couldn't find node%s%s%s%s",
1098                    id? " " : "",
1099                    id? id : "",
1100                    uname? " with name " : "",
1101                    uname? uname : "");
1102     }
1103 
1104     return node;
1105 }
1106 
1107 static void
1108 known_peer_cache_refresh_helper(xmlNode *xml_node, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
1109 {
1110     const char *id = crm_element_value(xml_node, XML_ATTR_ID);
1111     const char *uname = crm_element_value(xml_node, XML_ATTR_UNAME);
1112     crm_node_t * node =  NULL;
1113 
1114     CRM_CHECK(id != NULL && uname !=NULL, return);
1115     node = crm_find_known_peer(id, uname);
1116 
1117     if (node == NULL) {
1118         char *uniqueid = crm_generate_uuid();
1119 
1120         node = calloc(1, sizeof(crm_node_t));
1121         CRM_ASSERT(node != NULL);
1122 
1123         node->uname = strdup(uname);
1124         CRM_ASSERT(node->uname != NULL);
1125 
1126         node->uuid = strdup(id);
1127         CRM_ASSERT(node->uuid != NULL);
1128 
1129         g_hash_table_replace(crm_known_peer_cache, uniqueid, node);
1130 
1131     } else if (pcmk_is_set(node->flags, crm_node_dirty)) {
1132         if (!pcmk__str_eq(uname, node->uname, pcmk__str_casei)) {
1133             free(node->uname);
1134             node->uname = strdup(uname);
1135             CRM_ASSERT(node->uname != NULL);
1136         }
1137 
1138         /* Node is in cache and hasn't been updated already, so mark it clean */
1139         pcmk__clear_peer_flags(node, crm_node_dirty);
1140     }
1141 
1142 }
1143 
1144 #define XPATH_MEMBER_NODE_CONFIG \
1145     "//" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_NODES \
1146     "/" XML_CIB_TAG_NODE "[not(@type) or @type='member']"
1147 
1148 static void
1149 crm_known_peer_cache_refresh(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1150 {
1151     crm_peer_init();
1152 
1153     g_hash_table_foreach(crm_known_peer_cache, mark_dirty, NULL);
1154 
1155     crm_foreach_xpath_result(cib, XPATH_MEMBER_NODE_CONFIG,
1156                              known_peer_cache_refresh_helper, NULL);
1157 
1158     /* Remove all old cache entries that weren't seen in the CIB */
1159     g_hash_table_foreach_remove(crm_known_peer_cache, is_dirty, NULL);
1160 }
1161 
1162 void
1163 crm_peer_caches_refresh(xmlNode *cib)
     /* [previous][next][first][last][top][bottom][index][help] */
1164 {
1165     crm_remote_peer_cache_refresh(cib);
1166     crm_known_peer_cache_refresh(cib);
1167 }
1168 
1169 crm_node_t *
1170 crm_find_known_peer_full(unsigned int id, const char *uname, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
1171 {
1172     crm_node_t *node = NULL;
1173     char *id_str = NULL;
1174 
1175     CRM_ASSERT(id > 0 || uname != NULL);
1176 
1177     node = crm_find_peer_full(id, uname, flags);
1178 
1179     if (node || !(flags & CRM_GET_PEER_CLUSTER)) {
1180         return node;
1181     }
1182 
1183     if (id > 0) {
1184         id_str = crm_strdup_printf("%u", id);
1185     }
1186 
1187     node = crm_find_known_peer(id_str, uname);
1188 
1189     free(id_str);
1190     return node;
1191 }

/* [previous][next][first][last][top][bottom][index][help] */