forked from civilian7/sql-tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcompile_exercises.py
More file actions
417 lines (350 loc) · 15.3 KB
/
Copy pathcompile_exercises.py
File metadata and controls
417 lines (350 loc) · 15.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
#!/usr/bin/env python3
"""Compile exercise YAML files into mkdocs markdown and exercise.db.
Usage:
# Compile all exercises
python compile_exercises.py
# Compile and generate expected results from tutorial DB
python compile_exercises.py --tutorial-db output/ecommerce.db
# Validate only (no output)
python compile_exercises.py --validate-only
# Compile single file
python compile_exercises.py --file exercises/beginner/01-select.yaml
"""
import argparse
import hashlib
import json
import os
import sqlite3
import sys
from pathlib import Path
import yaml
EXERCISES_DIR = Path("exercises")
DOCS_KO_DIR = Path("docs/ko/exercises")
DOCS_EN_DIR = Path("docs/en/exercises")
OUTPUT_DB = Path("output/exercise.db")
def load_yaml(path: Path) -> dict:
with open(path, encoding="utf-8") as f:
return yaml.safe_load(f)
def create_exercise_db(db_path: Path):
"""Create exercise.db schema."""
os.makedirs(db_path.parent, exist_ok=True)
if db_path.exists():
db_path.unlink()
conn = sqlite3.connect(str(db_path))
conn.executescript("""
CREATE TABLE exercise_sets (
id TEXT PRIMARY KEY,
title TEXT NOT NULL,
title_en TEXT,
difficulty TEXT NOT NULL,
concepts TEXT NOT NULL,
prerequisites TEXT,
estimated_minutes INTEGER,
sort_order INTEGER NOT NULL,
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE problems (
id TEXT PRIMARY KEY,
exercise_id TEXT NOT NULL REFERENCES exercise_sets(id),
question TEXT NOT NULL,
question_en TEXT,
level INTEGER DEFAULT 3,
type TEXT DEFAULT 'SELECT',
reference_sql_common TEXT,
reference_sql_sqlite TEXT,
reference_sql_mysql TEXT,
reference_sql_postgresql TEXT,
supported_db TEXT NOT NULL DEFAULT '["sqlite","mysql","postgresql"]',
validation_json TEXT NOT NULL,
hints_json TEXT,
rubric TEXT,
rubric_en TEXT,
max_score INTEGER DEFAULT 10,
tags_json TEXT,
sort_order INTEGER NOT NULL,
expected_columns TEXT,
expected_row_count INTEGER,
expected_hash TEXT
);
CREATE TABLE exercise_tags (
tag TEXT PRIMARY KEY,
category TEXT NOT NULL
);
CREATE TABLE problem_tags (
problem_id TEXT NOT NULL REFERENCES problems(id),
tag TEXT NOT NULL,
PRIMARY KEY (problem_id, tag)
);
CREATE TABLE attempts (
id INTEGER PRIMARY KEY AUTOINCREMENT,
problem_id TEXT NOT NULL REFERENCES problems(id),
user_sql TEXT NOT NULL,
syntax_valid INTEGER NOT NULL,
columns_match INTEGER NOT NULL,
row_count_match INTEGER NOT NULL,
data_match INTEGER NOT NULL,
result_hash TEXT,
det_score INTEGER NOT NULL,
ai_score INTEGER,
ai_feedback TEXT,
total_score INTEGER NOT NULL,
execution_ms INTEGER,
row_count INTEGER,
attempted_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE progress (
problem_id TEXT PRIMARY KEY REFERENCES problems(id),
best_score INTEGER NOT NULL DEFAULT 0,
attempt_count INTEGER NOT NULL DEFAULT 0,
completed INTEGER NOT NULL DEFAULT 0,
first_solved_at TEXT,
last_attempt_at TEXT NOT NULL DEFAULT (datetime('now'))
);
CREATE TABLE badges (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
name_en TEXT,
description TEXT NOT NULL,
description_en TEXT,
icon TEXT,
condition_sql TEXT NOT NULL,
earned_at TEXT
);
CREATE INDEX idx_problems_exercise_id ON problems(exercise_id);
CREATE INDEX idx_attempts_problem_id ON attempts(problem_id);
""")
return conn
def compute_expected(conn_tutorial, sql: str) -> tuple:
"""Execute reference SQL and compute expected results."""
try:
cursor = conn_tutorial.execute(sql)
columns = [desc[0] for desc in cursor.description] if cursor.description else []
rows = cursor.fetchall()
row_count = len(rows)
# Hash for result comparison (sorted, stringified)
sorted_rows = sorted(str(r) for r in rows)
result_hash = hashlib.sha256("\n".join(sorted_rows).encode()).hexdigest()
return json.dumps(columns), row_count, result_hash
except Exception as e:
print(f" WARNING: SQL execution failed: {e}")
return None, None, None
def compile_yaml_file(yaml_path: Path, conn_db, conn_tutorial, sort_base: int) -> dict:
"""Compile a single YAML file into exercise.db + mkdocs markdown."""
data = load_yaml(yaml_path)
meta = data.get("metadata", {})
exercise_id = meta["id"]
print(f" [{exercise_id}] {meta.get('title', '')} ({len(data.get('problems', []))} problems)")
# Insert exercise set
conn_db.execute(
"INSERT INTO exercise_sets (id, title, title_en, difficulty, concepts, prerequisites, estimated_minutes, sort_order) VALUES (?,?,?,?,?,?,?,?)",
(
exercise_id,
meta.get("title", ""),
meta.get("title_en", ""),
meta.get("difficulty", "beginner"),
json.dumps(meta.get("concepts", []), ensure_ascii=False),
json.dumps(meta.get("prerequisites", []), ensure_ascii=False),
meta.get("estimated_minutes"),
sort_base,
),
)
# Build mkdocs markdown (ko + en)
md_ko_lines = [f"# {meta.get('title', exercise_id)}\n"]
md_en_lines = [f"# {meta.get('title_en', exercise_id)}\n"]
desc_ko = meta.get("description", "")
desc_en = meta.get("description_en", "")
if desc_ko:
md_ko_lines.append(f"{desc_ko}\n\n---\n")
if desc_en:
md_en_lines.append(f"{desc_en}\n\n---\n")
problems = data.get("problems", [])
for i, prob in enumerate(problems):
pid = prob["id"]
sort_order = sort_base * 100 + i + 1
# Resolve reference SQL
ref_sql = prob.get("reference_sql", {})
if isinstance(ref_sql, str):
ref_common = ref_sql
ref_sqlite = ref_mysql = ref_pg = None
else:
ref_common = ref_sql.get("common") or ref_sql.get("all")
ref_sqlite = ref_sql.get("sqlite")
ref_mysql = ref_sql.get("mysql")
ref_pg = ref_sql.get("postgresql")
supported = prob.get("supported_db", ["sqlite", "mysql", "postgresql"])
# Compute expected results
exec_sql = ref_sqlite or ref_common
exp_cols, exp_rows, exp_hash = None, None, None
if conn_tutorial and exec_sql:
exp_cols, exp_rows, exp_hash = compute_expected(conn_tutorial, exec_sql.strip())
# Hints (support both "hints" array and single "hint"/"hint_en" strings)
hints = prob.get("hints", [])
if not hints:
hint_ko = prob.get("hint", "")
hint_en = prob.get("hint_en", "")
if hint_ko:
hints = [{"ko": hint_ko, "en": hint_en or hint_ko}]
hints_json = json.dumps(hints, ensure_ascii=False) if hints else None
# Validation
validation = prob.get("validation", {"type": "result_match"})
# Resolve level and type
prob_level = prob.get("level", meta.get("level", 3))
prob_type = prob.get("type", meta.get("type", "SELECT"))
prob_tags = prob.get("tags", [])
# Insert problem
conn_db.execute(
"""INSERT INTO problems (id, exercise_id, question, question_en,
level, type,
reference_sql_common, reference_sql_sqlite, reference_sql_mysql, reference_sql_postgresql,
supported_db, validation_json, hints_json, rubric, rubric_en,
max_score, tags_json, sort_order, expected_columns, expected_row_count, expected_hash)
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
(
pid, exercise_id,
prob.get("question", "") or prob.get("body", ""),
prob.get("question_en", "") or prob.get("body_en", ""),
prob_level, prob_type,
ref_common, ref_sqlite, ref_mysql, ref_pg,
json.dumps(supported),
json.dumps(validation, ensure_ascii=False),
hints_json,
_to_str(prob.get("rubric", "")),
_to_str(prob.get("rubric_en", "")),
prob.get("max_score", 10),
json.dumps(prob_tags, ensure_ascii=False),
sort_order,
exp_cols, exp_rows, exp_hash,
),
)
# Insert problem-tag mappings
for tag in prob_tags:
conn_db.execute(
"INSERT OR IGNORE INTO problem_tags (problem_id, tag) VALUES (?,?)",
(pid, tag),
)
# Generate markdown
num = i + 1
# Support both "question" and "body" (with optional "title" prefix)
body_ko = prob.get("question", "") or prob.get("body", "")
body_en = prob.get("question_en", "") or prob.get("body_en", "") or body_ko
title_ko = prob.get("title", "")
title_en = prob.get("title_en", title_ko)
# Heading: use title if available, otherwise first line of body
heading_ko = title_ko or body_ko.strip().split("\n")[0][:60]
heading_en = title_en or body_en.strip().split("\n")[0][:60]
md_ko_lines.append(f"\n### {num}. {heading_ko}\n")
md_ko_lines.append(f"\n{body_ko.strip()}\n")
md_en_lines.append(f"\n### {num}. {heading_en}\n")
md_en_lines.append(f"\n{body_en.strip()}\n")
# Hints
for hi, hint in enumerate(hints):
if isinstance(hint, dict):
hint_ko = hint.get("ko", "")
hint_en = hint.get("en", hint_ko)
else:
hint_ko = hint_en = str(hint)
md_ko_lines.append(f"\n**힌트 {hi+1}:** {hint_ko}\n")
md_en_lines.append(f"\n**Hint {hi+1}:** {hint_en}\n")
# Answer (collapsible)
answer_sql = ref_common or ref_sqlite or ""
if ref_sqlite and ref_mysql and ref_pg:
# Multi-DB tabs
md_ko_lines.append('\n??? success "정답"\n')
md_ko_lines.append(f' === "SQLite"\n ```sql\n {_indent(ref_sqlite)}\n ```\n')
if ref_mysql:
md_ko_lines.append(f' === "MySQL"\n ```sql\n {_indent(ref_mysql)}\n ```\n')
if ref_pg:
md_ko_lines.append(f' === "PostgreSQL"\n ```sql\n {_indent(ref_pg)}\n ```\n')
md_en_lines.append('\n??? success "Answer"\n')
md_en_lines.append(f' === "SQLite"\n ```sql\n {_indent(ref_sqlite)}\n ```\n')
if ref_mysql:
md_en_lines.append(f' === "MySQL"\n ```sql\n {_indent(ref_mysql)}\n ```\n')
if ref_pg:
md_en_lines.append(f' === "PostgreSQL"\n ```sql\n {_indent(ref_pg)}\n ```\n')
else:
md_ko_lines.append(f'\n??? success "정답"\n ```sql\n {_indent(answer_sql)}\n ```\n')
md_en_lines.append(f'\n??? success "Answer"\n ```sql\n {_indent(answer_sql)}\n ```\n')
md_ko_lines.append("\n---\n")
md_en_lines.append("\n---\n")
return {
"exercise_id": exercise_id,
"md_ko": "\n".join(md_ko_lines),
"md_en": "\n".join(md_en_lines),
"problem_count": len(problems),
}
def _to_str(val) -> str:
"""Convert value to string; dict/list become JSON."""
if val is None:
return ""
if isinstance(val, (dict, list)):
return json.dumps(val, ensure_ascii=False)
return str(val)
def _indent(sql: str, prefix: str = " ") -> str:
"""Indent multi-line SQL for markdown code blocks."""
lines = sql.strip().split("\n")
return f"\n{prefix}".join(lines)
def main():
parser = argparse.ArgumentParser(description="Compile exercise YAML to mkdocs + exercise.db")
parser.add_argument("--tutorial-db", type=str, default="output/ecommerce.db",
help="Tutorial DB for computing expected results")
parser.add_argument("--output-db", type=str, default=str(OUTPUT_DB),
help="Output exercise.db path")
parser.add_argument("--validate-only", action="store_true", help="Validate only, no output")
parser.add_argument("--file", type=str, help="Compile a single YAML file")
args = parser.parse_args()
# Find YAML files
if args.file:
yaml_files = [Path(args.file)]
else:
yaml_files = sorted(EXERCISES_DIR.rglob("*.yaml"))
if not yaml_files:
print("No YAML exercise files found in exercises/")
return
print(f"Found {len(yaml_files)} exercise files")
# Connect to tutorial DB for expected result computation
conn_tutorial = None
if os.path.exists(args.tutorial_db):
conn_tutorial = sqlite3.connect(args.tutorial_db)
print(f"Using tutorial DB: {args.tutorial_db}")
if args.validate_only:
# Just parse and validate
for yf in yaml_files:
try:
data = load_yaml(yf)
meta = data.get("metadata", {})
problems = data.get("problems", [])
print(f" OK {yf} -{meta.get('id', '?')}: {len(problems)} problems")
except Exception as e:
print(f" ERR {yf} -{e}")
return
# Create exercise.db
conn_db = create_exercise_db(Path(args.output_db))
os.makedirs(DOCS_KO_DIR, exist_ok=True)
os.makedirs(DOCS_EN_DIR, exist_ok=True)
total_problems = 0
for i, yf in enumerate(yaml_files):
try:
result = compile_yaml_file(yf, conn_db, conn_tutorial, sort_base=i + 1)
total_problems += result["problem_count"]
# Write mkdocs markdown
md_filename = f"{result['exercise_id']}.md"
ko_path = DOCS_KO_DIR / md_filename
en_path = DOCS_EN_DIR / md_filename
# Only write if file doesn't exist OR is auto-generated
# (preserve hand-written files)
ko_path.write_text(result["md_ko"], encoding="utf-8")
en_path.write_text(result["md_en"], encoding="utf-8")
except Exception as e:
print(f" ERR {yf}: {e}")
import traceback
traceback.print_exc()
conn_db.commit()
conn_db.close()
if conn_tutorial:
conn_tutorial.close()
print(f"\nCompiled {len(yaml_files)} files, {total_problems} problems")
print(f" exercise.db: {args.output_db}")
print(f" mkdocs (ko): {DOCS_KO_DIR}/")
print(f" mkdocs (en): {DOCS_EN_DIR}/")
if __name__ == "__main__":
main()