Skip to content

Commit e67e6cc

Browse files
Merge branch 'develop' of github.com:NHSDigital/NRLF into NRL-1841-refresh-account-wide-infra
2 parents b4916f8 + a711f5d commit e67e6cc

22 files changed

Lines changed: 716 additions & 450 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,4 @@ allure-report/*
7979

8080
# Performance test ref data
8181
tests/performance/reference-data.json
82+
tests/performance/producer/expanded_pointer_distributions.json

Makefile

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ HOST ?= $(TF_WORKSPACE_NAME).api.record-locator.$(ENV).national.nhs.uk
1717
ENV_TYPE ?= $(ENV)
1818
PERFTEST_TABLE_NAME ?= perftest
1919
PERFTEST_HOST ?= perftest-1.perftest.record-locator.national.nhs.uk
20+
PERFTEST_PATIENTS_WITH_POINTERS ?= 0
21+
PERFTEST_POINTERS_PER_PATIENT ?= 0
22+
PERFTEST_TYPE_DIST_PROFILE ?= default
23+
PERFTEST_CUSTODIAN_DIST_PROFILE ?= default
2024

2125
export PATH := $(PATH):$(PWD)/.venv/bin
2226
export USE_SHARED_RESOURCES := $(shell poetry run python scripts/are_resources_shared_for_stack.py $(TF_WORKSPACE_NAME))
@@ -249,31 +253,39 @@ generate-models: check-warn ## Generate Pydantic Models
249253
--output-model-type "pydantic_v2.BaseModel"
250254

251255

252-
generate-perftest-permissions: ## Generate perftest permissions and add to nrlf_permissions
253-
poetry run python tests/performance/producer/generate_permissions.py --output_dir="$(DIST_PATH)/nrlf_permissions/K6PerformanceTest"
256+
perftest-generate-permissions: ## Generate perftest permissions and add to nrlf_permissions
257+
PYTHONPATH=. poetry run python tests/performance/producer/generate_permissions.py --output_dir="$(DIST_PATH)/nrlf_permissions/K6PerformanceTest"
254258

255-
perftest-producer:
259+
perftest-seed-tables: ## Seed tables and upload generated perftest input files to s3
260+
@echo "Seeding performance test pointer tables with ENV=$(ENV) and PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and PERFTEST_PATIENTS_WITH_POINTERS=$(PERFTEST_PATIENTS_WITH_POINTERS) and PERFTEST_POINTERS_PER_PATIENT=$(PERFTEST_POINTERS_PER_PATIENT) and PERFTEST_TYPE_DIST_PROFILE=$(PERFTEST_TYPE_DIST_PROFILE) and PERFTEST_CUSTODIAN_DIST_PROFILE=$(PERFTEST_CUSTODIAN_DIST_PROFILE)"
261+
rm -rf "${DIST_PATH}/nft"
262+
mkdir -p "${DIST_PATH}/nft"
263+
PYTHONPATH=. poetry run python ./scripts/seed_nft_tables.py --table_name=$(PERFTEST_TABLE_NAME) --patients_with_pointers=$(PERFTEST_PATIENTS_WITH_POINTERS) --pointers_per_patient=$(PERFTEST_POINTERS_PER_PATIENT) --type_dist_profile=$(PERFTEST_TYPE_DIST_PROFILE) --custodian_dist_profile=$(PERFTEST_CUSTODIAN_DIST_PROFILE)
264+
zip -r "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "${DIST_PATH}/nft"
265+
aws s3 cp "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "s3://nhsd-nrlf--${ENV}-metadata/performance/seed-pointers-extract-${PERFTEST_TABLE_NAME}.zip"
266+
267+
perftest-prepare: ## Prepare input files for producer & consumer perf tests
268+
@echo "Preparing performance tests with ENV=$(ENV) and PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and DIST_PATH=$(DIST_PATH)"
269+
rm -rf "${DIST_PATH}/nft"
270+
mkdir -p "${DIST_PATH}/nft"
271+
aws s3 cp "s3://nhsd-nrlf--${ENV}-metadata/performance/seed-pointers-extract-${PERFTEST_TABLE_NAME}.zip" "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip"
272+
unzip "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip"
273+
# cp "${DIST_PATH}/nft/seed-pointers-extract-${PERFTEST_TABLE_NAME}.csv" "${DIST_PATH}/seed-pointers-extract.csv"
274+
PYTHONPATH=. poetry run python ./tests/performance/generate_producer_distributions.py
275+
276+
perftest-producer: ## Run producer perf tests
256277
@echo "Running producer performance tests with HOST=$(PERFTEST_HOST) and ENV_TYPE=$(ENV_TYPE) and DIST_PATH=$(DIST_PATH)"
257278
k6 run tests/performance/producer/perftest.js -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
258279

259-
perftest-consumer:
280+
perftest-consumer: ## Run consumer perf tests
260281
@echo "Running consumer performance tests with HOST=$(PERFTEST_HOST) and ENV_TYPE=$(ENV_TYPE) and DIST_PATH=$(DIST_PATH)"
261282
k6 run tests/performance/consumer/perftest.js -e HOST=$(PERFTEST_HOST) -e ENV_TYPE=$(ENV_TYPE) -e DIST_PATH=$(DIST_PATH)
262283

263-
perftest-prep-generate-producer-data:
264-
@echo "Generating producer reference with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and DIST_PATH=$(DIST_PATH)"
265-
mkdir -p $(DIST_PATH)
266-
PYTHONPATH=. poetry run python tests/performance/perftest_environment.py generate_producer_data --output_dir="$(DIST_PATH)"
267-
268-
perftest-prep-extract-consumer-data:
269-
@echo "Generating consumer reference with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and DIST_PATH=$(DIST_PATH)"
270-
mkdir -p $(DIST_PATH)
271-
PYTHONPATH=. poetry run python tests/performance/perftest_environment.py extract_consumer_data --output_dir="$(DIST_PATH)"
272-
273-
perftest-prep-generate-pointer-table-extract:
284+
perftest-generate-pointer-table-extract: ## Refresh the perf test input files in s3. Can be expensive to run on large tables
274285
@echo "Generating pointer table extract with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and DIST_PATH=$(DIST_PATH)"
275-
mkdir -p $(DIST_PATH)
276-
PYTHONPATH=. poetry run python tests/performance/perftest_environment.py generate_pointer_table_extract --output_dir="$(DIST_PATH)"
277-
278-
perftest-prepare: perftest-prep-generate-producer-data perftest-prep-extract-consumer-data perftest-prep-generate-pointer-table-extract
279-
@echo "Prepared performance tests with PERFTEST_TABLE_NAME=$(PERFTEST_TABLE_NAME) and DIST_PATH=$(DIST_PATH)"
286+
rm -rf "${DIST_PATH}/nft"
287+
mkdir -p "${DIST_PATH}/nft"
288+
PYTHONPATH=. poetry run python tests/performance/perftest_environment.py generate_pointer_table_extract --output_dir="${DIST_PATH}/nft"
289+
./scripts/get-current-info.sh > "${DIST_PATH}/nft/info.json"
290+
zip -r "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "${DIST_PATH}/nft"
291+
aws s3 cp "${DIST_PATH}/pointer_extract-${PERFTEST_TABLE_NAME}.zip" "s3://nhsd-nrlf--${ENV}-metadata/performance/seed-pointers-extract-${PERFTEST_TABLE_NAME}.zip"

scripts/seed_nft_tables.py

Lines changed: 53 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import csv
2+
import os
23
from datetime import datetime, timedelta, timezone
34
from itertools import cycle
45
from math import gcd
@@ -7,10 +8,9 @@
78

89
import boto3
910
import fire
10-
11-
# import json
1211
import numpy as np
1312

13+
from nrlf.core.boto import get_s3_client
1414
from nrlf.core.constants import (
1515
CATEGORY_ATTRIBUTES,
1616
SNOMED_SYSTEM_URL,
@@ -20,12 +20,16 @@
2020
from nrlf.core.dynamodb.model import DocumentPointer
2121
from nrlf.core.logger import logger
2222
from nrlf.tests.data import load_document_reference
23+
from tests.performance.perftest_environment import create_extract_metadata_file
2324
from tests.performance.seed_data_constants import ( # DEFAULT_COUNT_DISTRIBUTIONS,
2425
CHECKSUM_WEIGHTS,
2526
CUSTODIAN_DISTRIBUTION_PROFILES,
2627
TYPE_DISTRIBUTION_PROFILES,
2728
)
2829

30+
dist_path = os.getenv("DIST_PATH", "./dist")
31+
nft_dist_path = f"{dist_path}/nft"
32+
2933
dynamodb = boto3.client("dynamodb")
3034
resource = boto3.resource("dynamodb")
3135

@@ -83,35 +87,56 @@ def _make_seed_pointer(
8387
return nft_pointer
8488

8589

90+
def _write_pointer_extract_to_file(table_name, pointer_data):
91+
local_csv_out = f"{nft_dist_path}/seed-pointers-extract.csv"
92+
local_meta_out = f"{nft_dist_path}/info.json"
93+
94+
print(f"writing pointer extract to files {local_csv_out} {local_meta_out}")
95+
96+
with open(local_csv_out, "w") as file:
97+
writer = csv.writer(file)
98+
writer.writerow(["pointer_id", "pointer_type", "custodian", "nhs_number"])
99+
writer.writerows(pointer_data)
100+
print(f"Pointer data saved to {local_csv_out}")
101+
102+
create_extract_metadata_file(table_name, nft_dist_path)
103+
104+
86105
def _populate_seed_table(
87106
table_name: str,
88-
px_with_pointers: int,
89-
pointers_per_px: float = 1.0,
107+
patients_with_pointers: int,
108+
pointers_per_patient: float = 1.0,
90109
type_dist_profile: str = "default",
91110
custodian_dist_profile: str = "default",
92111
):
93112
"""
94113
Seeds a table with example data for non-functional testing.
95114
"""
96-
if pointers_per_px < 1.0:
115+
if pointers_per_patient < 1.0:
97116
raise ValueError("Cannot populate table with patients with zero pointers")
98117

118+
print(
119+
f"Populating table {table_name} with patients_with_pointers={patients_with_pointers} pointers_per_patient={pointers_per_patient}",
120+
type_dist_profile,
121+
custodian_dist_profile,
122+
)
123+
99124
type_dists = TYPE_DISTRIBUTION_PROFILES[type_dist_profile]
100125
custodian_dists = CUSTODIAN_DISTRIBUTION_PROFILES[custodian_dist_profile]
101126

102127
# set up iterations
103128
type_iter = _set_up_cyclical_iterator(type_dists)
104129
custodian_iters = _set_up_custodian_iterators(custodian_dists)
105130
count_iter = _get_pointer_count_poisson_distributions(
106-
px_with_pointers, pointers_per_px
131+
patients_with_pointers, pointers_per_patient
107132
)
108133
testnum_cls = TestNhsNumbersIterator()
109134
testnum_iter = iter(testnum_cls)
110135

111-
px_counter = 0
112-
doc_ref_target = int(pointers_per_px * px_with_pointers)
136+
patient_counter = 0
137+
doc_ref_target = int(pointers_per_patient * patients_with_pointers)
113138
print(
114-
f"Will upsert ~{doc_ref_target} test pointers for {px_with_pointers} patients."
139+
f"Will upsert ~{doc_ref_target} test pointers for {patients_with_pointers} patients."
115140
)
116141
doc_ref_counter = 0
117142
batch_counter = 0
@@ -120,12 +145,15 @@ def _populate_seed_table(
120145
pointer_data: list[list[str]] = []
121146

122147
start_time = datetime.now(tz=timezone.utc)
123-
124148
batch_upsert_items: list[dict[str, Any]] = []
125-
while px_counter < px_with_pointers:
126-
pointers_for_px = int(next(count_iter))
127149

128-
if batch_counter + pointers_for_px > 25 or px_counter == px_with_pointers:
150+
while patient_counter <= patients_with_pointers:
151+
pointers_for_patient = int(next(count_iter))
152+
153+
if (
154+
batch_counter + pointers_for_patient > 25
155+
or patient_counter == patients_with_pointers
156+
):
129157
response = resource.batch_write_item(
130158
RequestItems={table_name: batch_upsert_items}
131159
)
@@ -138,45 +166,43 @@ def _populate_seed_table(
138166
batch_upsert_items = []
139167
batch_counter = 0
140168

141-
new_px = next(testnum_iter)
142-
for _ in range(pointers_for_px):
169+
new_patient = next(testnum_iter)
170+
for _ in range(pointers_for_patient):
143171
new_type = next(type_iter)
144172
new_custodian = next(custodian_iters[new_type])
145173
doc_ref_counter += 1
146174
batch_counter += 1
147175

148176
pointer = _make_seed_pointer(
149-
new_type, new_custodian, new_px, doc_ref_counter
177+
new_type, new_custodian, new_patient, doc_ref_counter
150178
)
151179
put_req = {"PutRequest": {"Item": pointer.model_dump()}}
152180
batch_upsert_items.append(put_req)
153181
pointer_data.append(
154182
[
155183
pointer.id,
156-
pointer.type,
184+
new_type, # not full type url
157185
pointer.custodian,
158186
pointer.nhs_number,
159187
]
160188
)
161-
px_counter += 1
189+
patient_counter += 1
162190

163-
if px_counter % 1000 == 0:
191+
if patient_counter % 1000 == 0:
164192
print(".", end="", flush=True)
165-
if px_counter % 100000 == 0:
166-
print(f" {px_counter} patients processed ({doc_ref_counter} pointers).")
193+
if patient_counter % 100000 == 0:
194+
print(
195+
f" {patient_counter} patients processed ({doc_ref_counter} pointers)."
196+
)
167197

168-
print(" Done.")
198+
print("Done")
169199

170200
end_time = datetime.now(tz=timezone.utc)
171201
print(
172202
f"Created {doc_ref_counter} pointers in {timedelta.total_seconds(end_time - start_time)} seconds (unprocessed: {unprocessed_count})."
173203
)
174204

175-
with open("./dist/seed-nft-pointers.csv", "w") as f:
176-
writer = csv.writer(f)
177-
writer.writerow(["pointer_id", "pointer_type", "custodian", "nhs_number"])
178-
writer.writerows(pointer_data)
179-
print(f"Pointer data saved to ./dist/seed-nft-pointers.csv") # noqa
205+
_write_pointer_extract_to_file(table_name, pointer_data)
180206

181207

182208
def _set_up_cyclical_iterator(dists: dict[str, int]) -> Iterator[str]:

terraform/bastion/Makefile

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
2+
ENV ?= dev
3+
TF_ARGS ?=
4+
ENV_ACCOUNT_NAME ?= $(shell ../../scripts/get-account-name-for-env.sh $(ENV))
5+
ENV_ACCOUNT_ID ?= $(shell aws secretsmanager get-secret-value --secret-id nhsd-nrlf--mgmt--$(ENV_ACCOUNT_NAME)-account-id --query SecretString --output text)
6+
BASTION_VPC_NAME ?= nhsd-nrlf--$(ENV_ACCOUNT_NAME)-vpc
7+
BASTION_SUBNET_NAME ?= nhsd-nrlf--$(ENV_ACCOUNT_NAME)-privsubnet
8+
POINTERS_TABLE_NAME ?= nhsd-nrlf--$(ENV)-pointers-table
9+
S3_METADATA_BUCKET_NAME ?= nhsd-nrlf--$(ENV)-metadata
10+
11+
export ENV ENV_ACCOUNT_NAME POINTERS_TABLE_NAME
12+
13+
help: ## Show this help message
14+
@echo "Usage: make [target]"
15+
@echo
16+
@echo "where [target] can be:"
17+
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-30s\033[0m %s\n", $$1, $$2}'
18+
19+
plan-ro: ## Plan the Terraform changes for a bastion that can perform read-only operations
20+
terraform plan \
21+
-var assume_account=$(ENV_ACCOUNT_ID) \
22+
-var assume_role=terraform \
23+
-var vpc_name=$(BASTION_VPC_NAME) \
24+
-var subnet_name=$(BASTION_SUBNET_NAME) \
25+
-var dynamodb_table_name=$(POINTERS_TABLE_NAME) \
26+
-var s3_metadata_bucket_name=$(S3_METADATA_BUCKET_NAME) \
27+
$(TF_ARGS) \
28+
-out=./bastion.tfplan
29+
30+
plan-rw: ## Plan the Terraform changes for a bastion that can perform write operations
31+
terraform plan \
32+
-var assume_account=$(ENV_ACCOUNT_ID) \
33+
-var assume_role=terraform \
34+
-var vpc_name=$(BASTION_VPC_NAME) \
35+
-var subnet_name=$(BASTION_SUBNET_NAME) \
36+
-var dynamodb_table_name=$(POINTERS_TABLE_NAME) \
37+
-var allow_dynamodb_table_write=true \
38+
-var s3_metadata_bucket_name=$(S3_METADATA_BUCKET_NAME) \
39+
$(TF_ARGS) \
40+
-out=./bastion.tfplan
41+
42+
destroy: ## Destroy the bastion
43+
terraform destroy \
44+
-var assume_account=$(ENV_ACCOUNT_ID) \
45+
-var assume_role=terraform \
46+
-var vpc_name=$(BASTION_VPC_NAME) \
47+
-var subnet_name=$(BASTION_SUBNET_NAME) \
48+
-var dynamodb_table_name=$(POINTERS_TABLE_NAME) \
49+
-var s3_metadata_bucket_name=$(S3_METADATA_BUCKET_NAME) \
50+
$(TF_ARGS)
51+
52+
ssh-connection: ## Connect to the bastion via SSH
53+
@echo "Connecting to bastion via SSM connect...."
54+
AWS_ACCOUNT_ID=$(ENV_ACCOUNT_ID) \
55+
AWS_ROLE_NAME=terraform \
56+
./scripts/start-bastion-connection.sh $(shell terraform output -raw instance_id)

0 commit comments

Comments
 (0)