diff --git a/osism/tasks/conductor/sonic/config_generator.py b/osism/tasks/conductor/sonic/config_generator.py index 839bc414..012fcda0 100644 --- a/osism/tasks/conductor/sonic/config_generator.py +++ b/osism/tasks/conductor/sonic/config_generator.py @@ -83,6 +83,42 @@ "VERSIONS", ) +# Tables inherited from the device's image-provided base config_db.json: +# their existing content is preserved across regen because the generator does +# not emit it itself (see the ownership model on generate_sonic_config). +INHERITED_TABLE_KEYS = ("DEVICE_METADATA", "VERSIONS") + +# Owned tables that are also scaffolded: every scaffold key except the +# inherited ones. The orchestrator setdefault-creates these up front, so +# downstream helpers can index into them unconditionally. +SCAFFOLDED_OWNED_TABLE_KEYS = tuple( + key for key in TOP_LEVEL_SCAFFOLD_KEYS if key not in INHERITED_TABLE_KEYS +) + +# Owned tables that are not scaffolded: helpers create and assign them +# directly rather than through the scaffold setdefault. Most are written only +# when NetBox carries the corresponding data, so when that data is absent the +# table is simply left out of the generated config. The exception is +# SNMP_SERVER, which _add_snmp_configuration always emits with hardcoded +# defaults (location "Data Center", contact "info@example.com") even when the +# device has no SNMP data in NetBox. +ON_DEMAND_OWNED_TABLE_KEYS = ( + "ROUTE_REDISTRIBUTE", + "SNMP_SERVER", + "SNMP_AGENT_ADDRESS_CONFIG", + "SNMP_SERVER_GROUP_MEMBER", + "SNMP_SERVER_USER", + "SNMP_SERVER_PARAMS", + "SNMP_SERVER_TARGET", + "SYSLOG_SERVER", +) + +# Tables fully owned by the generator and rebuilt from scratch on every regen. +# Their base content is dropped up front so entries removed from NetBox (e.g. +# a deleted VLAN_MEMBER, VRF, VXLAN_TUNNEL_MAP, or SNMP user) cannot survive as +# stale config. +OWNED_TABLE_KEYS = SCAFFOLDED_OWNED_TABLE_KEYS + ON_DEMAND_OWNED_TABLE_KEYS + def natural_sort_key(port_name): """Extract numeric part from port name for natural sorting.""" @@ -101,6 +137,27 @@ def generate_sonic_config(device, hwsku, device_as_mapping=None, config_version= Returns: dict: Minimal SONiC configuration dictionary + + Config ownership model: + This generator owns the tables listed in OWNED_TABLE_KEYS; operators + must not make manual adjustments to them. On every regen those tables + are rebuilt from scratch from NetBox data and hardcoded SONiC policy: + their base content is dropped up front, so neither pre-existing values + nor entries removed from NetBox survive. Operator customizations to + owned tables must be modeled in NetBox or expressed as generator + policy, not applied directly to config_db.json. + + Tables that are neither owned nor inherited are not emitted by the + generator and pass through unchanged from the device's base + config_db.json. The generator does not currently police them, so they + are not a supported place for operator customizations either. + + The generator builds on the device's image-provided base + config_db.json. A few fields it does not itself emit are inherited + from that base rather than regenerated — currently the + DEVICE_METADATA localhost device attributes (e.g. the device type) + and the DATABASE schema VERSION. These come from the image, not from + operator hand-edits. """ # Get port configuration for the HWSKU port_config = get_port_config(hwsku) @@ -191,6 +248,13 @@ def generate_sonic_config(device, hwsku, device_as_mapping=None, config_version= # Ensure we start fresh even on error config = {} + # Drop any owned-table content carried over from the on-disk base config. + # These tables are fully regenerated below from NetBox data and SONiC + # policy, so entries removed from NetBox must not survive as stale config. + # The inherited tables (DEVICE_METADATA, VERSIONS) keep their base content. + for _owned_key in OWNED_TABLE_KEYS: + config.pop(_owned_key, None) + # Ensure the top-level scaffold keys the orchestrator and downstream # helpers index into directly are always present, even when the on-disk # base config is missing or only partially populated. @@ -218,9 +282,11 @@ def generate_sonic_config(device, hwsku, device_as_mapping=None, config_version= primary_ip = str(device.primary_ip6.address).split("/")[0] if primary_ip: - if "default" not in config["BGP_GLOBALS"]: - config["BGP_GLOBALS"]["default"] = {} - config["BGP_GLOBALS"]["default"]["router_id"] = primary_ip + # BGP_GLOBALS is a generated section fully owned by this generator, so + # replace the default VRF entry wholesale rather than merging into a + # pre-existing one — pre-existing fields from config_db.json must not + # survive regen (see the ownership model in the docstring). + config["BGP_GLOBALS"]["default"] = {"router_id": primary_ip} # Calculate and add local_asn from router_id (only for IPv4) if device.primary_ip4: @@ -300,8 +366,6 @@ def generate_sonic_config(device, hwsku, device_as_mapping=None, config_version= config["MGMT_INTERFACE"]["eth0"] = {"admin_status": "up"} config["MGMT_INTERFACE"][f"eth0|{oob_ip}/{prefix_len}"] = {} metalbox_ip = _get_metalbox_ip_for_device(device) - # Write into the existing STATIC_ROUTE dict so any pre-existing - # routes loaded from /etc/sonic/config_db.json survive. config["STATIC_ROUTE"]["mgmt|0.0.0.0/0"] = {"nexthop": metalbox_ip} else: oob_ip = None diff --git a/tests/unit/tasks/conductor/sonic/test_config_generator_orchestrator.py b/tests/unit/tasks/conductor/sonic/test_config_generator_orchestrator.py index 99533a16..be149e67 100644 --- a/tests/unit/tasks/conductor/sonic/test_config_generator_orchestrator.py +++ b/tests/unit/tasks/conductor/sonic/test_config_generator_orchestrator.py @@ -15,6 +15,7 @@ from osism.tasks.conductor.sonic import config_generator from osism.tasks.conductor.sonic.config_generator import ( + OWNED_TABLE_KEYS, TOP_LEVEL_SCAFFOLD_KEYS, clear_all_caches, clear_metalbox_devices_cache, @@ -352,15 +353,16 @@ def test_generate_sonic_config_populates_mgmt_interface_and_static_route( assert snmp_oob == "10.42.0.5" -def test_generate_sonic_config_oob_path_preserves_existing_static_routes( +def test_generate_sonic_config_static_route_dropped_on_regen( mocker, patch_orchestrator_helpers, make_orchestrator_device ): - """Pre-existing ``STATIC_ROUTE`` entries must survive the OOB path. + """Pre-existing ``STATIC_ROUTE`` entries must be dropped on regen. - The OOB branch writes the management default route into ``STATIC_ROUTE``; - any routes loaded from ``/etc/sonic/config_db.json`` (e.g. an admin's - custom blackhole or VRF route) must not be silently dropped when the - branch fires. + Per the ownership model, STATIC_ROUTE is a generated section fully owned + by the generator: it is reset on each regen, so routes loaded from + ``/etc/sonic/config_db.json`` (e.g. an operator's custom blackhole or VRF + route) do not survive. The OOB branch then writes the management default + route as the only entry. """ base = make_base_config() base["STATIC_ROUTE"] = { @@ -374,9 +376,7 @@ def test_generate_sonic_config_oob_path_preserves_existing_static_routes( config = generate_sonic_config(device, "HWSKU") - assert config["STATIC_ROUTE"]["mgmt|10.0.0.0/8"] == {"nexthop": "192.0.2.1"} - assert config["STATIC_ROUTE"]["default|198.51.100.0/24"] == {"blackhole": "true"} - assert config["STATIC_ROUTE"]["mgmt|0.0.0.0/0"] == {"nexthop": "10.42.0.1"} + assert config["STATIC_ROUTE"] == {"mgmt|0.0.0.0/0": {"nexthop": "10.42.0.1"}} def test_generate_sonic_config_no_oob_ip_leaves_mgmt_empty_and_passes_none( @@ -465,6 +465,108 @@ def test_generate_sonic_config_version_existing_in_base_preserved( assert config["VERSIONS"]["DATABASE"]["VERSION"] == "version_4_5_0" +# --------------------------------------------------------------------------- +# generate_sonic_config — ownership model: BGP_GLOBALS["default"] +# --------------------------------------------------------------------------- + + +def test_generate_sonic_config_bgp_globals_default_extra_fields_dropped_on_regen( + mocker, patch_orchestrator_helpers, make_orchestrator_device +): + """Pre-existing BGP_GLOBALS['default'] fields must be dropped on regen. + + Per the ownership model, BGP_GLOBALS is a generated section: entries + are unconditionally overwritten from NetBox data and hardcoded policy, + so pre-existing fields from /etc/sonic/config_db.json must not survive. + + The orchestrator replaces BGP_GLOBALS['default'] wholesale rather than + merging into a pre-existing entry, so the default VRF follows the same + rule as every other generated section. + """ + base = make_base_config() + base["BGP_GLOBALS"]["default"] = { + "router_id": "192.0.2.1", + "local_asn": "4200000001", + "custom_timer": "operator-value", # not produced by the generator + } + patch_base_config(mocker, base_config=base) + device = make_orchestrator_device(primary_ip4=_ip("10.0.0.1/32")) + + config = generate_sonic_config(device, "HWSKU") + + assert "custom_timer" not in config["BGP_GLOBALS"]["default"] + + +def test_generate_sonic_config_stale_owned_entries_dropped_on_regen( + mocker, patch_orchestrator_helpers, make_orchestrator_device +): + """Owned-table entries removed from NetBox must not survive regen. + + The section helpers are mocked here, so nothing repopulates the owned + tables: any entry present only because it was carried over from the base + config_db.json must be gone after regen. The inherited tables + (DEVICE_METADATA, VERSIONS) keep their base content. + """ + base = make_base_config() + # Stale entries an operator/earlier run left behind, now absent from NetBox. + base["BGP_GLOBALS"]["old-vrf"] = {"router_id": "1.1.1.1"} + base["VLAN_MEMBER"]["Vlan999|Ethernet0"] = {"tagging_mode": "tagged"} + base["VXLAN_TUNNEL_MAP"]["vtepServ|map_999"] = {"vlan": "Vlan999", "vni": "999"} + base["SNMP_SERVER_USER"] = {"olduser": {"shaKey": "x"}} + base["SYSLOG_SERVER"] = {"10.9.9.9": {"severity": "info"}} + # Inherited tables: must be preserved across regen. + base["DEVICE_METADATA"]["localhost"] = {"type": "LeafRouter"} + base["VERSIONS"] = {"DATABASE": {"VERSION": "version_4_5_0"}} + patch_base_config(mocker, base_config=base) + device = make_orchestrator_device(primary_ip4=_ip("10.0.0.1/32")) + + config = generate_sonic_config(device, "HWSKU", config_version=None) + + # Scaffolded owned tables are emptied; the orchestrator rewrites only the + # default VRF in BGP_GLOBALS. + assert config["BGP_GLOBALS"] == { + "default": {"router_id": "10.0.0.1", "local_asn": "4200000001"} + } + assert config["VLAN_MEMBER"] == {} + assert config["VXLAN_TUNNEL_MAP"] == {} + # On-demand owned tables are dropped entirely (no mocked helper recreates + # them). + assert "SNMP_SERVER_USER" not in config + assert "SYSLOG_SERVER" not in config + # Inherited tables survive untouched. + assert config["DEVICE_METADATA"]["localhost"]["type"] == "LeafRouter" + assert config["VERSIONS"]["DATABASE"]["VERSION"] == "version_4_5_0" + + +def test_generate_sonic_config_every_owned_table_drops_stale_entries( + mocker, patch_orchestrator_helpers, make_orchestrator_device +): + """Exhaustive counterpart to the sampled stale-drop test above. + + The sampled test reads well but only seeds a handful of tables. Here we + seed a sentinel entry into *every* OWNED_TABLE_KEYS table and assert none + of those sentinels survive regen, so the guarantee covers the whole owned + set and stays in sync with it, including future additions. + + We assert on the sentinel key, not on the table being empty/absent: the + orchestrator legitimately repopulates some owned tables itself (e.g. + BGP_GLOBALS gets the default VRF), so "table is empty" is the wrong + invariant. "The stale carry-over entry is gone" is the right one. + """ + sentinel = "__stale_sentinel__" + base = make_base_config() + for owned_key in OWNED_TABLE_KEYS: + # setdefault: on-demand owned tables are absent from the scaffold base. + base.setdefault(owned_key, {})[sentinel] = {"stale": "value"} + patch_base_config(mocker, base_config=base) + device = make_orchestrator_device(primary_ip4=_ip("10.0.0.1/32")) + + config = generate_sonic_config(device, "HWSKU", config_version=None) + + survivors = [key for key in OWNED_TABLE_KEYS if sentinel in config.get(key, {})] + assert survivors == [], f"stale entry survived in owned tables: {survivors}" + + # --------------------------------------------------------------------------- # generate_sonic_config — netbox_interfaces collection # --------------------------------------------------------------------------- diff --git a/tests/unit/tasks/conductor/sonic/test_config_generator_ownership.py b/tests/unit/tasks/conductor/sonic/test_config_generator_ownership.py new file mode 100644 index 00000000..860fc61b --- /dev/null +++ b/tests/unit/tasks/conductor/sonic/test_config_generator_ownership.py @@ -0,0 +1,252 @@ +# SPDX-License-Identifier: Apache-2.0 + +"""Tests that document and verify the SONiC config generator's ownership model. + +generate_sonic_config() starts from a deep copy of /etc/sonic/config_db.json. +The section helpers use unconditional assignment for the entries they own, so +pre-existing values in those entries are not preserved on regen. + +The tests in this file directly invoke the private helpers to show what the +ownership rule means in practice. See the generate_sonic_config() docstring +for the full ownership statement. + +The orchestrator follows the same rule for the entries it writes directly; see +test_generate_sonic_config_bgp_globals_default_extra_fields_dropped_on_regen in +test_config_generator_orchestrator.py for BGP_GLOBALS['default']. +""" + +from types import SimpleNamespace + +from osism.tasks.conductor.sonic.config_generator import ( + INHERITED_TABLE_KEYS, + ON_DEMAND_OWNED_TABLE_KEYS, + OWNED_TABLE_KEYS, + SCAFFOLDED_OWNED_TABLE_KEYS, + TOP_LEVEL_SCAFFOLD_KEYS, + _add_vlan_configuration, + _add_vrf_configuration, +) + + +def _empty_config(): + """Minimal config dict covering the sections these helpers can write.""" + return { + "VRF": {}, + "VLAN": {}, + "VLAN_INTERFACE": {}, + "VLAN_MEMBER": {}, + "BGP_GLOBALS": {}, + "BGP_GLOBALS_AF": {}, + "BGP_GLOBALS_ROUTE_ADVERTISE": {}, + "ROUTE_REDISTRIBUTE": {}, + "VXLAN_TUNNEL": {}, + "VXLAN_EVPN_NVO": {}, + "VXLAN_TUNNEL_MAP": {}, + "MGMT_INTERFACE": {"eth0|10.0.0.1/24": {"gwaddr": "10.0.0.254"}}, + } + + +def _device(name="leaf-1"): + return SimpleNamespace(name=name) + + +# --------------------------------------------------------------------------- +# _add_vrf_configuration +# --------------------------------------------------------------------------- + + +class TestVrfConfigurationOwnership: + """_add_vrf_configuration owns VRF-derived config entries outright.""" + + def test_bgp_globals_for_vrf_replaces_preexisting_entry(self): + """Pre-existing BGP_GLOBALS[vrf_name] is replaced wholesale on regen. + + Any operator-added fields not produced by the generator (e.g. custom + timer overrides) are silently dropped. + """ + config = _empty_config() + config["BGP_GLOBALS"]["default"] = { + "router_id": "10.0.0.1", + "local_asn": "4200000001", + } + config["BGP_GLOBALS"]["tenant-vrf"] = { + "router_id": "10.0.0.1", + "local_asn": "4200000001", + "custom_timer": "operator-value", + } + vrf_info = { + "vrfs": {"tenant-vrf": {"table_id": 100}}, + "interface_vrf_mapping": {}, + } + + _add_vrf_configuration(config, vrf_info, {}) + + # Entry must exactly match the deepcopy of BGP_GLOBALS["default"]. + # custom_timer must be absent — it is not derived from NetBox or policy. + assert config["BGP_GLOBALS"]["tenant-vrf"] == { + "router_id": "10.0.0.1", + "local_asn": "4200000001", + } + assert "custom_timer" not in config["BGP_GLOBALS"]["tenant-vrf"] + + def test_vlan_for_vni_vrf_replaces_preexisting_entry(self): + """Pre-existing VLAN[Vlan{vni}] is replaced wholesale on regen. + + Operator-added fields (e.g. a description) are silently dropped. + """ + config = _empty_config() + config["BGP_GLOBALS"]["default"] = {} + vni = 2001 + vlan_name = f"Vlan{vni}" + config["VLAN"][vlan_name] = { + "admin_status": "up", + "autostate": "enable", + "vlanid": str(vni), + "description": "do-not-modify", + } + vrf_info = { + "vrfs": {"tenant-vrf": {"vni": vni}}, + "interface_vrf_mapping": {}, + } + + _add_vrf_configuration(config, vrf_info, {}) + + assert config["VLAN"][vlan_name] == { + "admin_status": "up", + "autostate": "enable", + "vlanid": str(vni), + } + assert "description" not in config["VLAN"][vlan_name] + + def test_route_redistribute_key_is_reset_to_empty_dict(self): + """The generated ROUTE_REDISTRIBUTE key is always reset to {} on regen. + + Any operator-configured route policy under the generated key is + silently dropped. + """ + config = _empty_config() + config["BGP_GLOBALS"]["default"] = {} + key = "tenant-vrf|connected|bgp|ipv4" + config["ROUTE_REDISTRIBUTE"][key] = {"route_map": "RM-CUSTOM"} + vrf_info = { + "vrfs": {"tenant-vrf": {"vni": 3001}}, + "interface_vrf_mapping": {}, + } + + _add_vrf_configuration(config, vrf_info, {}) + + assert config["ROUTE_REDISTRIBUTE"][key] == {} + + def test_sections_not_owned_by_vrf_helper_pass_through_unchanged(self): + """Sections not written by _add_vrf_configuration are not disturbed.""" + config = _empty_config() + config["BGP_GLOBALS"]["default"] = {} + vrf_info = { + "vrfs": {"tenant-vrf": {}}, + "interface_vrf_mapping": {}, + } + + _add_vrf_configuration(config, vrf_info, {}) + + assert config["MGMT_INTERFACE"] == { + "eth0|10.0.0.1/24": {"gwaddr": "10.0.0.254"} + } + + +# --------------------------------------------------------------------------- +# _add_vlan_configuration +# --------------------------------------------------------------------------- + + +class TestVlanConfigurationOwnership: + """_add_vlan_configuration owns VLAN entries outright.""" + + def test_vlan_entry_replaces_preexisting_entry(self): + """Pre-existing VLAN[VlanX] is replaced wholesale on regen. + + Operator-added fields not produced by the generator are silently + dropped. + """ + config = _empty_config() + vid = 100 + vlan_name = f"Vlan{vid}" + config["VLAN"][vlan_name] = { + "admin_status": "up", + "autostate": "enable", + "members": [], + "vlanid": str(vid), + "description": "operator-managed", + } + vlan_info = { + "vlans": {vid: {}}, + "vlan_members": {}, + "vlan_interfaces": {}, + } + + _add_vlan_configuration(config, vlan_info, {}, _device()) + + assert config["VLAN"][vlan_name] == { + "admin_status": "up", + "autostate": "enable", + "members": [], + "vlanid": str(vid), + } + assert "description" not in config["VLAN"][vlan_name] + + +# --------------------------------------------------------------------------- +# Ownership-taxonomy invariants +# --------------------------------------------------------------------------- + + +class TestOwnershipTaxonomyInvariants: + """Guard the relationships between the table-classification constants. + + OWNED_TABLE_KEYS is derived from these sets, so most of the taxonomy is + correct by construction. These tests pin the parts that are *not* derived + -- the hand-written INHERITED_TABLE_KEYS and ON_DEMAND_OWNED_TABLE_KEYS + literals -- so an accidental edit to either fails here rather than silently + breaking the ownership model at runtime. + """ + + def test_scaffold_partitions_into_owned_and_inherited(self): + """The scaffold set is exactly its owned and inherited tables. + + Every scaffolded table is either rebuilt (scaffolded-owned) or + preserved (inherited), with nothing left unclassified and no inherited + table outside the scaffold set. The latter matters at runtime: the + orchestrator setdefault-creates only TOP_LEVEL_SCAFFOLD_KEYS and then + indexes into the inherited tables directly (e.g. + config["DEVICE_METADATA"]), so an inherited table missing from the + scaffold set would KeyError on a fresh base config. + """ + assert set(TOP_LEVEL_SCAFFOLD_KEYS) == ( + set(SCAFFOLDED_OWNED_TABLE_KEYS) | set(INHERITED_TABLE_KEYS) + ) + + def test_owned_and_inherited_are_disjoint(self): + """No table is both owned and inherited. + + Owned tables are dropped up front and rebuilt; inherited tables keep + their base content. A table in both sets would be dropped *and* + expected to survive -- the drop wins, silently breaking inheritance. + """ + assert set(OWNED_TABLE_KEYS).isdisjoint(INHERITED_TABLE_KEYS) + + def test_owned_table_keys_has_no_duplicates(self): + """OWNED_TABLE_KEYS lists each table once. + + A duplicate means an on-demand literal shadows a scaffolded key (or + vice versa), signalling the two classification sets have drifted into + overlap. + """ + assert len(OWNED_TABLE_KEYS) == len(set(OWNED_TABLE_KEYS)) + + def test_scaffolded_and_on_demand_owned_are_disjoint(self): + """The two owned sub-categories do not overlap. + + Scaffolded-owned tables are created up front; on-demand owned tables + are created only when NetBox carries their data. A table in both would + be miscategorised about how it comes into existence. + """ + assert set(SCAFFOLDED_OWNED_TABLE_KEYS).isdisjoint(ON_DEMAND_OWNED_TABLE_KEYS)