Skip to content

Commit 33a4e42

Browse files
authored
Merge pull request #14 from poyrazK/feat/logic-testing-and-integrity
feat: advanced join semantics, transactional integrity, and expanded SLT suite
2 parents e8f2c69 + fec18cb commit 33a4e42

22 files changed

Lines changed: 732 additions & 122 deletions

README.md

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@ A lightweight, distributed SQL database engine. Designed for cloud environments
1515
- **Analytics Performance**:
1616
- **Columnar Storage**: Binary-per-column persistence for efficient analytical scanning.
1717
- **Vectorized Execution**: Batch-at-a-time processing model for high-throughput query execution.
18-
- **Multi-Node Transactions**: ACID guarantees across the cluster via Two-Phase Commit (2PC).
18+
- **Multi-Node Transactions**: ACID guarantees across the cluster via Two-Phase Commit (2PC) and connection-aware execution state supporting `BEGIN`, `COMMIT`, and `ROLLBACK`.
19+
- **Advanced Execution Engine**:
20+
- **Full Outer Join Support**: Specialized `HashJoinOperator` implementing `LEFT`, `RIGHT`, and `FULL` outer join semantics with automatic null-padding.
21+
- **B+ Tree Indexing**: Persistent indexing for high-speed point lookups and optimized query planning.
1922
- **Type-Safe Value System**: Robust handling of SQL data types using `std::variant`.
2023
- **Volcano & Vectorized Engine**: Flexible execution models supporting traditional row-based and high-performance columnar processing.
2124
- **PostgreSQL Wire Protocol**: Handshake and simple query protocol implementation for tool compatibility.
@@ -46,17 +49,18 @@ A lightweight, distributed SQL database engine. Designed for cloud environments
4649
mkdir build
4750
cd build
4851
cmake ..
49-
make -j$(nproc)
52+
make -j$(nproc) # Or ../tests/run_test.sh for automated multi-OS build
5053
```
5154

5255
### Running Tests
5356

5457
```bash
55-
# Run all tests
58+
# Run the integrated test suite (Unit + E2E + Logic)
59+
./tests/run_test.sh
60+
61+
# Or run individual binaries
5662
./build/sqlEngine_tests
57-
# Run distributed-specific tests
5863
./build/distributed_tests
59-
./build/distributed_txn_tests
6064
```
6165

6266
### Starting the Cluster

docs/phases/README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,10 @@ This directory contains the technical documentation for the lifecycle of the clo
5656

5757
### Phase 9 — Stability & Testing Refinement
5858
**Focus**: Engine Robustness & E2E Validation.
59-
- Slotted-page layout fixes for large table support.
60-
- Buffer Pool Manager lifecycle management (destructor flushing).
61-
- Robust Python E2E client with partial-read handling and numeric validation.
62-
- Standardized test orchestration via `run_test.sh`.
59+
- **Advanced Execution**: Full support for `LEFT`, `RIGHT`, and `FULL` outer joins.
60+
- **Transactional Integrity**: Persistent connection-based execution state and comprehensive `ROLLBACK` support for all DML operations.
61+
- **Logic Validation**: Integration of the SqlLogicTest (SLT) suite with 80+ logic test cases covering Joins, Transactions, Aggregates, and Indexes.
62+
- **Automation**: Standardized cross-platform test orchestration via `run_test.sh` with automatic CPU detection.
6363

6464
---
6565

include/executor/operator.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -326,12 +326,12 @@ class HashJoinOperator : public Operator {
326326
class LimitOperator : public Operator {
327327
private:
328328
std::unique_ptr<Operator> child_;
329-
uint64_t limit_;
330-
uint64_t offset_;
329+
int64_t limit_;
330+
int64_t offset_;
331331
uint64_t current_count_ = 0;
332332

333333
public:
334-
LimitOperator(std::unique_ptr<Operator> child, uint64_t limit, uint64_t offset = 0);
334+
LimitOperator(std::unique_ptr<Operator> child, int64_t limit, int64_t offset = 0);
335335

336336
bool init() override;
337337
bool open() override;

include/executor/query_executor.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class QueryExecutor {
4444
transaction::TransactionManager& transaction_manager,
4545
recovery::LogManager* log_manager = nullptr,
4646
cluster::ClusterManager* cluster_manager = nullptr);
47-
~QueryExecutor() = default;
47+
~QueryExecutor();
4848

4949
// Disable copy/move for executor
5050
QueryExecutor(const QueryExecutor&) = delete;
@@ -74,6 +74,7 @@ class QueryExecutor {
7474

7575
QueryResult execute_select(const parser::SelectStatement& stmt, transaction::Transaction* txn);
7676
QueryResult execute_create_table(const parser::CreateTableStatement& stmt);
77+
QueryResult execute_create_index(const parser::CreateIndexStatement& stmt);
7778
QueryResult execute_drop_table(const parser::DropTableStatement& stmt);
7879
QueryResult execute_drop_index(const parser::DropIndexStatement& stmt);
7980
QueryResult execute_insert(const parser::InsertStatement& stmt, transaction::Transaction* txn);

include/parser/statement.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,8 @@ class SelectStatement : public Statement {
7373
std::vector<std::unique_ptr<Expression>> group_by_;
7474
std::unique_ptr<Expression> having_;
7575
std::vector<std::unique_ptr<Expression>> order_by_;
76-
int64_t limit_ = 0;
77-
int64_t offset_ = 0;
76+
int64_t limit_ = -1;
77+
int64_t offset_ = -1;
7878
bool distinct_ = false;
7979

8080
public:
@@ -112,7 +112,7 @@ class SelectStatement : public Statement {
112112
[[nodiscard]] int64_t limit() const { return limit_; }
113113
[[nodiscard]] int64_t offset() const { return offset_; }
114114
[[nodiscard]] bool distinct() const { return distinct_; }
115-
[[nodiscard]] bool has_limit() const { return limit_ > 0; }
115+
[[nodiscard]] bool has_limit() const { return limit_ >= 0; }
116116
[[nodiscard]] bool has_offset() const { return offset_ > 0; }
117117

118118
[[nodiscard]] std::string to_string() const override;

include/parser/token.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ enum class TokenType : uint8_t {
4949
Join,
5050
Left,
5151
Right,
52+
Full,
5253
Inner,
5354
Outer,
5455
Order,

include/storage/heap_table.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,12 @@ class HeapTable {
170170
*/
171171
bool physical_remove(const TupleId& tuple_id);
172172

173+
/**
174+
* @brief Resets xmax to 0 (used for rollback of a DELETE)
175+
* @return true on success
176+
*/
177+
bool undo_remove(const TupleId& tuple_id);
178+
173179
/**
174180
* @brief Replaces an existing record with new data
175181
* @param tuple_id The record to update

include/transaction/transaction.hpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
#define CLOUDSQL_TRANSACTION_TRANSACTION_HPP
88

99
#include <atomic>
10+
#include <cassert>
1011
#include <mutex>
12+
#include <optional>
1113
#include <unordered_set>
1214
#include <vector>
1315

@@ -55,6 +57,7 @@ struct UndoLog {
5557
Type type = Type::INSERT;
5658
std::string table_name;
5759
storage::HeapTable::TupleId rid;
60+
std::optional<storage::HeapTable::TupleId> old_rid;
5861
};
5962

6063
/**
@@ -120,7 +123,17 @@ class Transaction {
120123

121124
void add_undo_log(UndoLog::Type type, const std::string& table_name,
122125
const storage::HeapTable::TupleId& rid) {
123-
undo_logs_.push_back({type, table_name, rid});
126+
/* Enforce invariant: non-UPDATE types should not provide old_rid through this overload */
127+
assert(type != UndoLog::Type::UPDATE);
128+
undo_logs_.push_back({type, table_name, rid, std::nullopt});
129+
}
130+
131+
void add_undo_log(UndoLog::Type type, const std::string& table_name,
132+
const storage::HeapTable::TupleId& rid,
133+
const storage::HeapTable::TupleId& old_rid) {
134+
/* Enforce invariant: this overload is primarily for UPDATE types providing old_rid */
135+
assert(type == UndoLog::Type::UPDATE);
136+
undo_logs_.push_back({type, table_name, rid, old_rid});
124137
}
125138

126139
[[nodiscard]] const std::vector<UndoLog>& get_undo_logs() const { return undo_logs_; }

include/transaction/transaction_manager.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class TransactionManager {
8282
/**
8383
* @brief Undo changes made by a transaction
8484
*/
85-
void undo_transaction(Transaction* txn);
85+
bool undo_transaction(Transaction* txn);
8686
};
8787

8888
} // namespace cloudsql::transaction

src/executor/operator.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,12 @@ bool IndexScanOperator::next(Tuple& out_tuple) {
162162
while (current_match_index_ < matching_ids_.size()) {
163163
const auto& tid = matching_ids_[current_match_index_++];
164164

165+
storage::HeapTable::TupleId rid;
166+
rid.page_num = tid.page_num;
167+
rid.slot_num = tid.slot_num;
168+
165169
storage::HeapTable::TupleMeta meta;
166-
if (table_->get_meta(tid, meta)) {
170+
if (table_->get_meta(rid, meta)) {
167171
/* MVCC Visibility Check */
168172
bool visible = true;
169173
const Transaction* const txn = get_txn();
@@ -734,7 +738,7 @@ void HashJoinOperator::add_child(std::unique_ptr<Operator> child) {
734738

735739
/* --- LimitOperator --- */
736740

737-
LimitOperator::LimitOperator(std::unique_ptr<Operator> child, uint64_t limit, uint64_t offset)
741+
LimitOperator::LimitOperator(std::unique_ptr<Operator> child, int64_t limit, int64_t offset)
738742
: Operator(OperatorType::Limit, child->get_txn(), child->get_lock_manager()),
739743
child_(std::move(child)),
740744
limit_(limit),
@@ -750,17 +754,20 @@ bool LimitOperator::open() {
750754
}
751755

752756
/* Skip offset rows */
757+
current_count_ = 0;
753758
Tuple tuple;
754-
while (current_count_ < offset_ && child_->next(tuple)) {
755-
current_count_++;
759+
if (offset_ > 0) {
760+
while (current_count_ < static_cast<uint64_t>(offset_) && child_->next(tuple)) {
761+
current_count_++;
762+
}
756763
}
757764
current_count_ = 0;
758765
set_state(ExecState::Open);
759766
return true;
760767
}
761768

762769
bool LimitOperator::next(Tuple& out_tuple) {
763-
if (current_count_ >= limit_) {
770+
if (limit_ >= 0 && current_count_ >= static_cast<uint64_t>(limit_)) {
764771
set_state(ExecState::Done);
765772
return false;
766773
}

0 commit comments

Comments
 (0)