Skip to content

Commit 0f756c3

Browse files
authored
Merge pull request #11 from poyrazK/fix/storage-and-tests-stability
Stability & Testing Refinement
2 parents 7b814db + 5cd90ae commit 0f756c3

17 files changed

Lines changed: 643 additions & 42 deletions

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ coverage/
8484
*.orig
8585
*.rej
8686

87+
# Storage Files
88+
# ==============
89+
*.heap
90+
8791
# ==============
8892
# Emacs
8993
# ==============

docs/phases/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,13 @@ This directory contains the technical documentation for the lifecycle of the clo
5454
- Batch-at-a-time vectorized execution model (Scan, Filter, Project, Aggregate).
5555
- High-performance `NumericVector` and `VectorBatch` data structures.
5656

57+
### Phase 9 — Stability & Testing Refinement
58+
**Focus**: Engine Robustness & E2E Validation.
59+
- Slotted-page layout fixes for large table support.
60+
- Buffer Pool Manager lifecycle management (destructor flushing).
61+
- Robust Python E2E client with partial-read handling and numeric validation.
62+
- Standardized test orchestration via `run_test.sh`.
63+
5764
---
5865

5966
## Technical Standards

include/parser/parser.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ class Parser {
2525

2626
std::unique_ptr<Statement> parse_select();
2727
std::unique_ptr<Statement> parse_create_table();
28+
std::unique_ptr<Statement> parse_create_index();
2829
std::unique_ptr<Statement> parse_insert();
2930
std::unique_ptr<Statement> parse_update();
3031
std::unique_ptr<Statement> parse_delete();

include/parser/statement.hpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,43 @@ class CreateTableStatement : public Statement {
239239
[[nodiscard]] std::string to_string() const override;
240240
};
241241

242+
/**
243+
* @brief CREATE INDEX statement
244+
*/
245+
class CreateIndexStatement : public Statement {
246+
private:
247+
std::string index_name_;
248+
std::string table_name_;
249+
std::vector<std::string> columns_;
250+
bool unique_ = false;
251+
252+
public:
253+
CreateIndexStatement() = default;
254+
255+
[[nodiscard]] StmtType type() const override { return StmtType::CreateIndex; }
256+
257+
void set_index_name(std::string name) { index_name_ = std::move(name); }
258+
void set_table_name(std::string name) { table_name_ = std::move(name); }
259+
void add_column(std::string col) { columns_.push_back(std::move(col)); }
260+
void set_unique(bool unique) { unique_ = unique; }
261+
262+
[[nodiscard]] const std::string& index_name() const { return index_name_; }
263+
[[nodiscard]] const std::string& table_name() const { return table_name_; }
264+
[[nodiscard]] const std::vector<std::string>& columns() const { return columns_; }
265+
[[nodiscard]] bool unique() const { return unique_; }
266+
267+
[[nodiscard]] std::string to_string() const override {
268+
std::string s = "CREATE ";
269+
if (unique_) s += "UNIQUE ";
270+
s += "INDEX " + index_name_ + " ON " + table_name_ + " (";
271+
for (size_t i = 0; i < columns_.size(); ++i) {
272+
s += columns_[i] + (i == columns_.size() - 1 ? "" : ", ");
273+
}
274+
s += ")";
275+
return s;
276+
}
277+
};
278+
242279
/**
243280
* @brief DROP TABLE statement
244281
*/

src/executor/query_executor.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,12 @@ std::unique_ptr<Operator> QueryExecutor::build_plan(const parser::SelectStatemen
716716
}
717717
current_root = std::make_unique<AggregateOperator>(std::move(current_root),
718718
std::move(group_by), std::move(aggs));
719+
720+
/* 3.5. Having */
721+
if (stmt.having()) {
722+
current_root =
723+
std::make_unique<FilterOperator>(std::move(current_root), stmt.having()->clone());
724+
}
719725
}
720726

721727
/* 4. Sort (ORDER BY) */

src/parser/lexer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ std::map<std::string, TokenType> Lexer::init_keywords() {
8181
{"CHAR", TokenType::TypeChar},
8282
{"BOOL", TokenType::TypeBool},
8383
{"BOOLEAN", TokenType::TypeBool},
84-
{"DISTINCT", TokenType::Distinct}};
84+
{"DISTINCT", TokenType::Distinct},
85+
{"HAVING", TokenType::Having}};
8586
}
8687

8788
Token Lexer::next_token() {

src/parser/parser.cpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ std::unique_ptr<Statement> Parser::parse_statement() {
4444
static_cast<void>(next_token()); // consume CREATE
4545
if (peek_token().type() == TokenType::Table) {
4646
stmt = parse_create_table();
47+
} else if (peek_token().type() == TokenType::Index ||
48+
peek_token().type() == TokenType::Unique) {
49+
stmt = parse_create_index();
4750
}
4851
break;
4952
case TokenType::Insert:
@@ -341,6 +344,62 @@ std::unique_ptr<Statement> Parser::parse_create_table() {
341344
return stmt;
342345
}
343346

347+
/**
348+
* @brief Parse CREATE INDEX statement
349+
*/
350+
std::unique_ptr<Statement> Parser::parse_create_index() {
351+
auto stmt = std::make_unique<CreateIndexStatement>();
352+
if (consume(TokenType::Unique)) {
353+
stmt->set_unique(true);
354+
}
355+
if (!consume(TokenType::Index)) {
356+
return nullptr;
357+
}
358+
359+
const Token name = next_token();
360+
if (name.type() != TokenType::Identifier) {
361+
return nullptr;
362+
}
363+
stmt->set_index_name(name.lexeme());
364+
365+
if (!consume(TokenType::On)) {
366+
return nullptr;
367+
}
368+
369+
const Token table_name = next_token();
370+
if (table_name.type() != TokenType::Identifier) {
371+
return nullptr;
372+
}
373+
stmt->set_table_name(table_name.lexeme());
374+
375+
if (!consume(TokenType::LParen)) {
376+
return nullptr;
377+
}
378+
379+
bool first = true;
380+
while (true) {
381+
if (!first && !consume(TokenType::Comma)) {
382+
break;
383+
}
384+
first = false;
385+
386+
const Token col_name = next_token();
387+
if (col_name.type() != TokenType::Identifier) {
388+
return nullptr;
389+
}
390+
stmt->add_column(col_name.lexeme());
391+
392+
if (peek_token().type() == TokenType::RParen) {
393+
break;
394+
}
395+
}
396+
397+
if (!consume(TokenType::RParen)) {
398+
return nullptr;
399+
}
400+
return stmt;
401+
}
402+
344403
/**
345404
* @brief Parse INSERT statement
346405
*/

src/storage/buffer_pool_manager.cpp

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include <cstdint>
99
#include <cstring>
10+
#include <iostream>
1011
#include <memory>
1112
#include <mutex>
1213
#include <string>
@@ -28,7 +29,19 @@ BufferPoolManager::BufferPoolManager(size_t pool_size, StorageManager& storage_m
2829
}
2930
}
3031

31-
BufferPoolManager::~BufferPoolManager() = default;
32+
BufferPoolManager::~BufferPoolManager() {
33+
try {
34+
flush_all_pages();
35+
} catch (const std::exception& e) {
36+
// Log error to stderr; avoid throwing from destructor to prevent std::terminate
37+
std::cerr << "[Error] Exception in BufferPoolManager destructor during flush_all_pages: "
38+
<< e.what() << std::endl;
39+
} catch (...) {
40+
std::cerr
41+
<< "[Error] Unknown exception in BufferPoolManager destructor during flush_all_pages"
42+
<< std::endl;
43+
}
44+
}
3245

3346
Page* BufferPoolManager::fetch_page(const std::string& file_name, uint32_t page_id) {
3447
const std::scoped_lock<std::mutex> lock(latch_);
@@ -62,7 +75,11 @@ Page* BufferPoolManager::fetch_page(const std::string& file_name, uint32_t page_
6275
page->file_name_ = file_name;
6376
page->pin_count_ = 1;
6477
page->is_dirty_ = false;
65-
storage_manager_.read_page(file_name, page_id, page->get_data());
78+
79+
if (!storage_manager_.read_page(file_name, page_id, page->get_data())) {
80+
// If read fails (e.g. file too short), initialize with zeros
81+
std::memset(page->get_data(), 0, Page::PAGE_SIZE);
82+
}
6683

6784
replacer_.pin(frame_id);
6885
return page;

src/storage/heap_table.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,12 +135,10 @@ HeapTable::TupleId HeapTable::insert(const executor::Tuple& tuple, uint64_t xmin
135135
}
136136

137137
const auto required = static_cast<uint16_t>(data_str.size() + 1);
138-
const auto slot_array_end =
139-
static_cast<uint16_t>(sizeof(PageHeader) + ((header.num_slots + 1) * sizeof(uint16_t)));
140138

141139
/* Check for sufficient free space in the current page */
142140
if (header.free_space_offset + required < Page::PAGE_SIZE &&
143-
slot_array_end < header.free_space_offset) {
141+
header.num_slots < DEFAULT_SLOT_COUNT) {
144142
const uint16_t offset = header.free_space_offset;
145143
std::memcpy(std::next(buffer.data(), static_cast<std::ptrdiff_t>(offset)),
146144
data_str.c_str(), data_str.size() + 1);

tests/analytics_tests.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,4 +219,40 @@ TEST(AnalyticsTests, AggregateNullHandling) {
219219
EXPECT_TRUE(result_batch->get_column(1).is_null(0));
220220
}
221221

222+
TEST(AnalyticsTests, VectorizedExpressionAdvanced) {
223+
StorageManager storage("./test_analytics");
224+
Schema schema;
225+
schema.add_column("a", common::ValueType::TYPE_INT64, true);
226+
schema.add_column("b", common::ValueType::TYPE_INT64, true);
227+
228+
auto batch = VectorBatch::create(schema);
229+
// Row 0: (10, 20)
230+
batch->append_tuple(Tuple({common::Value::make_int64(10), common::Value::make_int64(20)}));
231+
// Row 1: (NULL, 30)
232+
batch->append_tuple(Tuple({common::Value::make_null(), common::Value::make_int64(30)}));
233+
// Row 2: (40, NULL)
234+
batch->append_tuple(Tuple({common::Value::make_int64(40), common::Value::make_null()}));
235+
236+
// Test: (a IS NULL) OR (a > 20)
237+
auto col_a = std::make_unique<ColumnExpr>("a");
238+
auto is_null = std::make_unique<IsNullExpr>(std::move(col_a), false);
239+
auto col_a_2 = std::make_unique<ColumnExpr>("a");
240+
auto gt_20 =
241+
std::make_unique<BinaryExpr>(std::move(col_a_2), TokenType::Gt,
242+
std::make_unique<ConstantExpr>(common::Value::make_int64(20)));
243+
244+
BinaryExpr or_expr(std::move(is_null), TokenType::Or, std::move(gt_20));
245+
246+
NumericVector<bool> res(common::ValueType::TYPE_BOOL);
247+
or_expr.evaluate_vectorized(*batch, schema, res);
248+
249+
ASSERT_EQ(res.size(), 3U);
250+
// Row 0: (10 IS NULL) OR (10 > 20) -> FALSE OR FALSE -> FALSE
251+
EXPECT_FALSE(res.get(0).as_bool());
252+
// Row 1: (NULL IS NULL) OR (NULL > 20) -> TRUE OR NULL -> TRUE
253+
EXPECT_TRUE(res.get(1).as_bool());
254+
// Row 2: (40 IS NULL) OR (40 > 20) -> FALSE OR TRUE -> TRUE
255+
EXPECT_TRUE(res.get(2).as_bool());
256+
}
257+
222258
} // namespace

0 commit comments

Comments
 (0)