refactor: reorganize estimation and server scripts [code:qws] [system:MiyabiG]

yoshifuminakamura · yoshifuminakamura · commit 5d3c26e32f90 · 2026-04-06T20:36:30.000+09:00
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -207,7 +207,7 @@ trigger_benchpark_pipeline:
 generate_estimate_matrix:
   stage: generate
   script:
-    - bash scripts/generate_estimate_from_uuid.sh
+    - bash scripts/estimation/generate_reestimate_pipeline.sh
   tags:
     - general
   artifacts:
diff --git a/README.md b/README.md
@@ -77,14 +77,18 @@ benchkit/
 │   ├── job_functions.sh      # 共通関数定義（CSVパース、System_CSV検索）
 │   ├── bk_functions.sh       # FOM/SECTION/OVERLAP出力標準化関数
 │   ├── result.sh             # 結果JSON変換（SECTION/OVERLAP対応、pipeline_timing付加）
-│   ├── send_results.sh       # 結果転送（uuid/timestamp書き戻し）
+│   ├── result_server/
 │   ├── record_timestamp.sh   # Unixエポックタイムスタンプ記録
 │   ├── collect_timing.sh     # パイプラインタイミング収集（build/queue/run時間）
-│   ├── estimate_common.sh    # 性能推定共通ライブラリ
-│   ├── run_estimate.sh       # 推定実行ラッパー
-│   ├── send_estimate.sh      # 推定結果転送
-│   ├── fetch_result_by_uuid.sh # UUID指定結果取得
-│   ├── generate_estimate_from_uuid.sh # UUID指定推定パイプライン生成
+│   │   ├── api.sh            # result_server JSON取得共通
+│   │   ├── send_results.sh   # 結果転送（uuid/timestamp書き戻し）
+│   │   ├── send_estimate.sh  # 推定結果転送
+│   │   └── fetch_result_by_uuid.sh # UUID指定結果取得
+│   ├── estimation/
+│   │   ├── common.sh         # 性能推定共通ライブラリ
+│   │   ├── run.sh            # 推定実行ラッパー
+│   │   ├── test_reestimate.sh # 再推定確認ヘルパー
+│   │   └── generate_reestimate_pipeline.sh # UUID指定推定パイプライン生成
 │   ├── wait_for_nfs.sh       # NFS同期待機（現在コメントアウト中）
 │   └── test_submit.sh        # テスト実行用
 ├── .gitlab-ci.yml            # メインCI定義
@@ -225,7 +229,7 @@ python -m pytest tests/ -v
 - `run.sh` は `scripts/bk_functions.sh` を `source` し、`bk_emit_result` / `bk_emit_section` / `bk_emit_overlap` で標準化された結果出力を行う
 - `record_timestamp.sh` は run/build_run ジョブ（計算ノード上）でビルド・実行の開始/終了時刻を記録する
 - `collect_timing.sh` と `result.sh` は send_results ジョブ（Docker ランナー `fncx-curl-jq` 上）で実行される。`collect_timing.sh` で `pipeline_timing`（build/queue/run時間）を集計し、`result.sh` で結果をJSON形式に変換（`pipeline_timing` 情報を自動付加）する
-- `scripts/send_results.sh` で結果サーバに転送・性能推定トリガー
+- `scripts/result_server/send_results.sh` で結果サーバに転送・性能推定トリガー
 
 ### 3. 結果転送・保存
 - `results/result[0-9].json` を結果サーバに転送
@@ -238,7 +242,7 @@ python -m pytest tests/ -v
 
 - 推定対象システム: `ESTIMATE_SYSTEMS`（job_functions.sh で定義、例: MiyabiG, RC_GH200）
 - `estimate.sh` がアプリ固有の推定ロジックを実装（`programs/<code>/estimate.sh`）
-- `estimate_common.sh` が共通関数（API呼び出し、JSON出力等）を提供
+- `scripts/estimation/common.sh` が共通関数（API呼び出し、JSON出力等）を提供
 - 簡易推定と詳細推定の双方を将来的に受け入れられる設計を前提とする
 - UUID指定による再推定もサポート
   - `estimate_result_uuid` を指定すると、その estimate から `source_result_uuid` を引いて再推定
@@ -250,7 +254,7 @@ python -m pytest tests/ -v
 - `benchpark-bridge/scripts/ci_generator.sh` により `.gitlab-ci.benchpark.yml` を自動生成
 - `benchpark-bridge/scripts/runner.sh` でBenchPark（Spack/Ramble）を実行
 - `benchpark-bridge/scripts/result_converter.py` でRamble結果をBenchKit JSON形式に変換
-- 結果は `scripts/send_results.sh` で結果サーバに転送
+- 結果は `scripts/result_server/send_results.sh` で結果サーバに転送
 
 ---
 
diff --git a/benchpark-bridge/scripts/ci_generator.sh b/benchpark-bridge/scripts/ci_generator.sh
@@ -63,7 +63,7 @@ ${job_prefix}_send:
     - test -n \"\$RESULT_SERVER\" && echo \"RESULT_SERVER is set\" || echo \"RESULT_SERVER is NOT set\"
     - test -n \"\$RESULT_SERVER_KEY\" && echo \"RESULT_SERVER_KEY is set\" || echo \"RESULT_SERVER_KEY is NOT set\"
     - echo \"Sending results to server\"
-    - bash scripts/send_results.sh
+    - bash scripts/result_server/send_results.sh
 
 " >> "$output"
 }
@@ -156,4 +156,4 @@ ${job_prefix}_run:
 
 done < "$BENCHPARK_LIST"
 
-echo "BenchPark GitLab CI configuration generated: $OUTPUT_FILE"
+echo "BenchPark GitLab CI configuration generated: $OUTPUT_FILE"
diff --git a/docs/cx/BENCHKIT_GAP_ANALYSIS.md b/docs/cx/BENCHKIT_GAP_ANALYSIS.md
@@ -64,7 +64,7 @@ However, estimation is still not yet broadly deployed across multiple applicatio
 | ベンチマーク実行定義 | アプリごとの build/run/list を保持し、継続実行可能であること | `programs/*` に `build.sh` `run.sh` `list.csv`、一部 `estimate.sh` がある | 追加や修正がまだ人手中心。雛形生成や申請導線がない | 申請・承認・AI 連携の前提になる | 高 |
 | CI ジョブ生成 | system と queue 情報を使って CI 実行を生成すること | `matrix_generate.sh` と `job_functions.sh` が実装済み | 拠点接続の検証や onboarding 手順が未整理 | 拠点追加、予算管理、申請フォームの自動化に影響 | 高 |
 | 結果正規化 | `run.sh` 出力を Result JSON に正規化すること | `bk_emit_result`、`bk_emit_section`、`bk_emit_overlap`、`result.sh` が実装済み | app ごとの差異を自動検証する仕組みが弱い | 推定、可視化、AI 診断の入力品質に直結 | 高 |
-| 性能推定 | Result JSON から Estimate JSON を生成し、可視化可能であること | `estimate_common.sh`、`run_estimate.sh`、`send_estimate.sh`、`estimated` 画面あり。`qws` では軽量推定と詳細ダミー推定、section ごとの package 指定、補助データ参照、section-level fallback、requested/applied package 識別、top-level applicability end state、推定元 result と推定結果自体の UUID / timestamp 保持まで動作する | 横展開はまだ `qws` 中心。複数 detailed package の本実装、再推定比較運用、他 app への適用が未完成 | AI 駆動、将来機評価、継続的フィードバックの基盤になる | 最優先 |
+| 性能推定 | Result JSON から Estimate JSON を生成し、可視化可能であること | `scripts/estimation/common.sh`、`scripts/estimation/run.sh`、`scripts/result_server/send_estimate.sh`、`estimated` 画面あり。`qws` では軽量推定と詳細ダミー推定、section ごとの package 指定、補助データ参照、section-level fallback、requested/applied package 識別、top-level applicability end state、推定元 result と推定結果自体の UUID / timestamp 保持まで動作する | 横展開はまだ `qws` 中心。複数 detailed package の本実装、再推定比較運用、他 app への適用が未完成 | AI 駆動、将来機評価、継続的フィードバックの基盤になる | 最優先 |
 | 推定結果表示 | Estimate JSON を一覧・詳細で表示できること | `result_server/routes/estimated.py` とテンプレートが実装済み。requested/applied package、applicability、estimate UUID の基本表示も入っている | section / overlap 単位の package applicability や `not_applicable` 詳細の見せ方、比較 UI がまだ弱い | 推定運用を本格化すると重要度が上がる | 高 |
 | 使用量集計 | 実行使用量を集計し、運用判断に使えること | `node_hours.py` と `/results/usage` が実装済み | 予算主体、アカウント主体、runner 主体との結び付きがない | 多拠点運用と予算管理の核になる | 高 |
 | ソース出自情報 | 最上位アプリケーションの commit hash を追跡すること | `bk_fetch_source` と `source_info` が実装済み | すべての app で徹底されていない。 archive/file の場合は commit hash を持てない | 推定比較、AI 最適化、回帰分析の再現性に直結 | 高 |
@@ -183,7 +183,7 @@ Once the estimation specification is clarified, many other design decisions beco
 
 | 項目 | 仕様上の期待 | 現状実装 | GAP | 優先度 |
 |---|---|---|---|---|
-| 共通推定エントリ | app 側 `estimate.sh` を薄くし、共通呼び出し順を持つこと | `estimate_common.sh` と package 呼び出し型の `qws/estimate.sh` がある | 他 app への横展開が未着手 | 最優先 |
+| 共通推定エントリ | app 側 `estimate.sh` を薄くし、共通呼び出し順を持つこと | `scripts/estimation/common.sh` と package 呼び出し型の `qws/estimate.sh` がある | 他 app への横展開が未着手 | 最優先 |
 | 軽量推定 package | FOM-only、weak scaling 前提、補正なしなら FOM 一定 | `lightweight_fom_scaling` が実装済み | 参照実装は 1 本のみ。current/future 側で別 model を使う実運用は未整備 | 高 |
 | 適用可能性判定 | 不足入力を `applicable/fallback/not_applicable/needs_remeasurement` で扱うこと | `lightweight_fom_scaling` と `instrumented_app_sections_dummy` はこれらを扱え、final Estimate JSON では `applicable`、`partially_applicable`、`fallback`、`not_applicable` を表現できる。requested/applied package の識別も保持できる | 複数 detailed package 間の分岐、より細かい fallback 選択、UI 表示は未実装 | 高 |
 | package metadata | package 名、版、required inputs、fallback policy を持つこと | 軽量/詳細ダミーとも最小 metadata を持つ | richer metadata を discovery や UI に活かす実装がまだ無い | 中 |
@@ -198,7 +198,7 @@ Once the estimation specification is clarified, many other design decisions beco
 
 この表から、現在の最小核は以下と整理できる。
 
-1. `estimate_common.sh` を中心とした共通呼び出しと Estimate JSON 出力
+1. `scripts/estimation/common.sh` を中心とした共通呼び出しと Estimate JSON 出力
 2. `lightweight_fom_scaling` による FOM-only 軽量推定
 3. `instrumented_app_sections_dummy` による区間時間ベース詳細ダミー推定
 4. 推定元 result UUID / timestamp の引き回し
diff --git a/docs/cx/ESTIMATION_PACKAGE_SHELL_API_SPEC.md b/docs/cx/ESTIMATION_PACKAGE_SHELL_API_SPEC.md
@@ -28,9 +28,9 @@ This document is a lower-level specification under [`ESTIMATION_PACKAGE_SPEC.md`
 
 ## 2. 目的 / Purpose
 
-本 API の目的は、推定パッケージごとの差を吸収しつつ、`estimate_common.sh` や app 側 `estimate.sh` から一貫した呼び出しができるようにすることである。
+本 API の目的は、推定パッケージごとの差を吸収しつつ、`scripts/estimation/common.sh` や app 側 `estimate.sh` から一貫した呼び出しができるようにすることである。
 
-The purpose of this API is to allow consistent invocation from `estimate_common.sh` and application-side `estimate.sh` while absorbing differences among estimation packages.
+The purpose of this API is to allow consistent invocation from `scripts/estimation/common.sh` and application-side `estimate.sh` while absorbing differences among estimation packages.
 
 ## 3. 基本方針 / Basic Policy
 
@@ -182,7 +182,7 @@ In the future, application-side `estimate.sh` should preferably be limited to:
 
 ```sh
 BK_ESTIMATION_PACKAGE=lightweight_fom_scaling
-source scripts/estimate_common.sh
+source scripts/estimation/common.sh
 source scripts/estimation/packages/${BK_ESTIMATION_PACKAGE}.sh
 
 read_values "$1"
diff --git a/docs/cx/ESTIMATION_PACKAGE_SPEC.md b/docs/cx/ESTIMATION_PACKAGE_SPEC.md
@@ -539,7 +539,7 @@ In this form, the application side is responsible only for package selection and
 - package を shell で書くか外部ツールで書くか
 - package metadata の完全な構造
 
-これらは、現行の `estimate_common.sh` と app 側 `estimate.sh` の実装経験を踏まえて段階的に固定する。
+これらは、現行の `scripts/estimation/common.sh` と app 側 `estimate.sh` の実装経験を踏まえて段階的に固定する。
 
 This document does not yet fix:
 
@@ -549,7 +549,7 @@ This document does not yet fix:
 - whether packages are written in shell or external tools
 - the complete schema of package metadata
 
-These should be fixed incrementally based on implementation experience with the current `estimate_common.sh` and application-side `estimate.sh`.
+These should be fixed incrementally based on implementation experience with the current `scripts/estimation/common.sh` and application-side `estimate.sh`.
 
 ## 9. 次に必要な下位仕様 / Next Detailed Specifications
 
diff --git a/docs/cx/REESTIMATION_SPEC.md b/docs/cx/REESTIMATION_SPEC.md
@@ -148,17 +148,17 @@ Optionally, the following may also be supplied:
 
 現行実装では、以下の要素が存在する。
 
-- `run_estimate.sh` が app ごとの `estimate.sh` を呼び出す
-- `send_estimate.sh` が推定結果を結果サーバに送信する
+- `scripts/estimation/run.sh` が app ごとの `estimate.sh` を呼び出す
+- `scripts/result_server/send_estimate.sh` が推定結果を結果サーバに送信する
 - Estimate JSON には推定元 benchmark result の UUID を `estimate_metadata.source_result_uuid` として保持できる
 - Estimate JSON には保存対象としての推定結果自体の UUID / timestamp を `estimate_metadata.estimation_result_uuid` / `estimation_result_timestamp` として保持できる
 - `requested_estimation_package` と実際に適用された `estimation_package` を区別できる
 - `applicability` を通じて `applicable`、`partially_applicable`、`fallback`、`not_applicable` を最終状態として保持できる
 
 In the current implementation, the following already exist:
 
-- `run_estimate.sh` to invoke app-specific `estimate.sh`
-- `send_estimate.sh` to send estimation results to the result server
+- `scripts/estimation/run.sh` to invoke app-specific `estimate.sh`
+- `scripts/result_server/send_estimate.sh` to send estimation results to the result server
 - the ability to retain the source benchmark-result UUID as `estimate_metadata.source_result_uuid`
 - the ability to retain the estimate-result UUID / timestamp as `estimate_metadata.estimation_result_uuid` / `estimation_result_timestamp`
 - a distinction between `requested_estimation_package` and the actually applied `estimation_package`
diff --git a/docs/guides/add-estimation-to-app.md b/docs/guides/add-estimation-to-app.md
@@ -45,7 +45,7 @@
 set -euo pipefail
 
 BK_ESTIMATION_PACKAGE="lightweight_fom_scaling"
-source scripts/estimate_common.sh
+source scripts/estimation/common.sh
 source "scripts/estimation/packages/${BK_ESTIMATION_PACKAGE}.sh"
 
 bk_run_estimation "$1"
@@ -150,7 +150,7 @@ bk_emit_overlap \
 set -euo pipefail
 
 BK_ESTIMATION_PACKAGE="instrumented_app_sections_dummy"
-source scripts/estimate_common.sh
+source scripts/estimation/common.sh
 source "scripts/estimation/packages/${BK_ESTIMATION_PACKAGE}.sh"
 
 bk_run_estimation "$1"
diff --git a/programs/qws/estimate.sh b/programs/qws/estimate.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 # estimate.sh — Reference package-based estimation entrypoint for qws
 
-source scripts/estimate_common.sh
+source scripts/estimation/common.sh
 
 BK_ESTIMATION_PACKAGE="${BK_ESTIMATION_PACKAGE:-instrumented_app_sections_dummy}"
 BK_ESTIMATION_BASELINE_SYSTEM="Fugaku"
diff --git a/scripts/estimation/common.sh b/scripts/estimation/common.sh
@@ -1,15 +1,15 @@
 #!/bin/bash
-# estimate_common.sh — Common function library for performance estimation
+# common.sh — Common function library for performance estimation
 #
 # Provides shared variables and functions used by application-specific
 # estimate scripts (programs/<code>/estimate.sh).
 #
 # Usage:
-#   source scripts/estimate_common.sh
+#   source scripts/estimation/common.sh
 
 set -euo pipefail
 
-source "$(dirname "${BASH_SOURCE[0]}")/result_server_client.sh"
+source "$(dirname "${BASH_SOURCE[0]}")/../result_server/api.sh"
 
 # ---------------------------------------------------------------------------
 # Global variables — populated by read_values
diff --git a/scripts/estimation/generate_reestimate_pipeline.sh b/scripts/estimation/generate_reestimate_pipeline.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# generate_estimate_from_uuid.sh — UUID-based estimation pipeline YAML generator
+# generate_reestimate_pipeline.sh — UUID-based estimation pipeline YAML generator
 #
 # Generates a child pipeline YAML (.gitlab-ci.estimate.yml) for re-estimation
 # of a specific benchmark result identified by UUID.
@@ -40,7 +40,7 @@ fetch_result:
   tags: [fncx-curl-jq]
   script:
     - echo "Fetching re-estimation input"
-    - bash scripts/fetch_result_by_uuid.sh
+    - bash scripts/result_server/fetch_result_by_uuid.sh
   artifacts:
     paths:
       - results/
@@ -52,7 +52,7 @@ estimate_${code}:
   tags: ["general"]
   script:
     - echo "Running estimation for ${code}"
-    - bash scripts/run_estimate.sh ${code}
+    - bash scripts/estimation/run.sh ${code}
   artifacts:
     paths:
       - results/
@@ -65,7 +65,7 @@ send_estimate_${code}:
   environment:
     name: \$CI_COMMIT_BRANCH
   script:
-    - bash scripts/send_estimate.sh
+    - bash scripts/result_server/send_estimate.sh
 YAML
 
 echo "Generated $OUTPUT_FILE"
diff --git a/scripts/estimation/run.sh b/scripts/estimation/run.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
-# run_estimate.sh — Estimation execution wrapper
+# run.sh — Estimation execution wrapper
 #
 # Called from CI job script section:
-#   bash scripts/run_estimate.sh <code>
+#   bash scripts/estimation/run.sh <code>
 #
 # Discovers result*.json files in results/ and runs the corresponding
 # application-specific estimate script for each one.
diff --git a/scripts/estimation/test_reestimate.sh b/scripts/estimation/test_reestimate.sh
@@ -62,11 +62,11 @@ echo "  uuid: $input_uuid"
 echo ""
 
 echo "[1/2] Fetching source result"
-bash scripts/fetch_result_by_uuid.sh
+bash scripts/result_server/fetch_result_by_uuid.sh
 
 echo ""
 echo "[2/2] Running estimation"
-bash scripts/run_estimate.sh "$code"
+bash scripts/estimation/run.sh "$code"
 
 echo ""
 echo "Generated files:"
diff --git a/scripts/job_functions.sh b/scripts/job_functions.sh
@@ -191,7 +191,7 @@ ${job_prefix}_send_results:
   script:
     - bash scripts/collect_timing.sh
     - bash scripts/result.sh ${program} ${system} ${mode} ${build_job} ${run_job} \$CI_PIPELINE_ID
-    - bash scripts/send_results.sh
+    - bash scripts/result_server/send_results.sh
   artifacts:
     paths:
       - results/
@@ -270,7 +270,7 @@ ${job_prefix}_estimate:
     name: \$CI_COMMIT_BRANCH
   script:
     - echo \"Running estimation for ${code}\"
-    - bash scripts/run_estimate.sh ${code}
+    - bash scripts/estimation/run.sh ${code}
   artifacts:
     paths:
       - results/
@@ -296,7 +296,7 @@ ${job_prefix}_send_estimate:
   environment:
     name: \$CI_COMMIT_BRANCH
   script:
-    - bash scripts/send_estimate.sh
+    - bash scripts/result_server/send_estimate.sh
 
 " >> "$output"
 }
diff --git a/scripts/result_server/api.sh b/scripts/result_server/api.sh
diff --git a/scripts/result_server/fetch_result_by_uuid.sh b/scripts/result_server/fetch_result_by_uuid.sh
@@ -10,7 +10,7 @@
 #   RESULT_SERVER  - Base URL of the result server
 set -euo pipefail
 
-source "$(dirname "$0")/result_server_client.sh"
+source "$(dirname "$0")/api.sh"
 
 if [[ -z "${code:-}" ]]; then
   echo "ERROR: code must be specified" >&2
diff --git a/scripts/result_server/send_estimate.sh b/scripts/result_server/send_estimate.sh
diff --git a/scripts/result_server/send_results.sh b/scripts/result_server/send_results.sh