@@ -565,19 +565,29 @@ Schema& AggregateOperator::output_schema() {
565565
566566HashJoinOperator::HashJoinOperator (std::unique_ptr<Operator> left, std::unique_ptr<Operator> right,
567567 std::unique_ptr<parser::Expression> left_key,
568- std::unique_ptr<parser::Expression> right_key)
568+ std::unique_ptr<parser::Expression> right_key,
569+ executor::JoinType join_type)
569570 : Operator(OperatorType::HashJoin, left->get_txn (), left->get_lock_manager()),
570571 left_(std::move(left)),
571572 right_(std::move(right)),
572573 left_key_(std::move(left_key)),
573- right_key_(std::move(right_key)) {
574+ right_key_(std::move(right_key)),
575+ join_type_(join_type) {
574576 /* Build resulting schema */
575577 if (left_ && right_) {
576578 for (const auto & col : left_->output_schema ().columns ()) {
577- schema_.add_column (col);
579+ auto col_meta = col;
580+ if (join_type_ == executor::JoinType::Right || join_type_ == executor::JoinType::Full) {
581+ col_meta.set_nullable (true );
582+ }
583+ schema_.add_column (col_meta);
578584 }
579585 for (const auto & col : right_->output_schema ().columns ()) {
580- schema_.add_column (col);
586+ auto col_meta = col;
587+ if (join_type_ == executor::JoinType::Left || join_type_ == executor::JoinType::Full) {
588+ col_meta.set_nullable (true );
589+ }
590+ schema_.add_column (col_meta);
581591 }
582592 }
583593}
@@ -597,62 +607,107 @@ bool HashJoinOperator::open() {
597607 auto right_schema = right_->output_schema ();
598608 while (right_->next (right_tuple)) {
599609 const common::Value key = right_key_->evaluate (&right_tuple, &right_schema);
600- hash_table_.emplace (key.to_string (), std::move (right_tuple));
610+ hash_table_.emplace (key.to_string (), BuildTuple{ std::move (right_tuple), false } );
601611 }
602612
603613 left_tuple_ = std::nullopt ;
604614 match_iter_ = std::nullopt ;
615+ left_had_match_ = false ;
616+ right_idx_iter_ = std::nullopt ;
605617 set_state (ExecState::Open);
606618 return true ;
607619}
608620
609621bool HashJoinOperator::next (Tuple& out_tuple) {
610622 auto left_schema = left_->output_schema ();
623+ auto right_schema = right_->output_schema ();
611624
612625 while (true ) {
613626 if (match_iter_.has_value ()) {
614- /* We are currently iterating through matches for a left tuple */
615627 auto & iter_state = match_iter_.value ();
616628 if (iter_state.current != iter_state.end ) {
617- const auto & right_tuple = iter_state.current ->second ;
618-
619- /* Concatenate left and right tuples */
620- if (left_tuple_.has_value ()) {
621- std::vector<common::Value> joined_values = left_tuple_->values ();
622- joined_values.insert (joined_values.end (), right_tuple.values ().begin (),
623- right_tuple.values ().end ());
629+ auto & build_tuple = iter_state.current ->second ;
630+ const auto & right_tuple = build_tuple.tuple ;
631+ std::vector<common::Value> joined_values = left_tuple_->values ();
632+ joined_values.insert (joined_values.end (), right_tuple.values ().begin (),
633+ right_tuple.values ().end ());
634+
635+ out_tuple = Tuple (std::move (joined_values));
636+ iter_state.current ++;
637+ left_had_match_ = true ;
638+ build_tuple.matched = true ;
639+ return true ;
640+ }
624641
625- out_tuple = Tuple (std::move (joined_values));
626- iter_state.current ++;
627- return true ;
642+ /* No more matches for this left tuple. If (LEFT or FULL join) and no matches found,
643+ * emit NULLs */
644+ match_iter_ = std::nullopt ;
645+ if ((join_type_ == JoinType::Left || join_type_ == JoinType::Full) &&
646+ !left_had_match_) {
647+ std::vector<common::Value> joined_values = left_tuple_->values ();
648+ for (size_t i = 0 ; i < right_schema.column_count (); ++i) {
649+ joined_values.push_back (common::Value::make_null ());
628650 }
651+ out_tuple = Tuple (std::move (joined_values));
652+ left_tuple_ = std::nullopt ;
653+ return true ;
629654 }
630- /* No more matches for this left tuple */
631- match_iter_ = std::nullopt ;
632655 left_tuple_ = std::nullopt ;
633656 }
634657
635658 /* Pull next tuple from left side */
636659 Tuple next_left;
637- if (!left_->next (next_left)) {
638- set_state (ExecState::Done);
639- return false ;
640- }
641-
642- left_tuple_ = std::move (next_left);
643- if (left_tuple_.has_value ()) {
660+ if (left_->next (next_left)) {
661+ left_tuple_ = std::move (next_left);
662+ left_had_match_ = false ;
644663 const common::Value key = left_key_->evaluate (&(left_tuple_.value ()), &left_schema);
645664
646665 /* Look up in hash table */
647666 auto range = hash_table_.equal_range (key.to_string ());
648667 if (range.first != range.second ) {
649668 match_iter_ = {range.first , range.second };
650- /* Continue loop to return the first match */
669+ } else if (join_type_ == JoinType::Left || join_type_ == JoinType::Full) {
670+ /* No match found immediately, emit NULLs if Left/Full join */
671+ std::vector<common::Value> joined_values = left_tuple_->values ();
672+ for (size_t i = 0 ; i < right_schema.column_count (); ++i) {
673+ joined_values.push_back (common::Value::make_null ());
674+ }
675+ out_tuple = Tuple (std::move (joined_values));
676+ left_tuple_ = std::nullopt ;
677+ return true ;
651678 } else {
652- /* No match for this left tuple, pull next */
679+ /* Inner/Right join and no match, skip to next left tuple */
653680 left_tuple_ = std::nullopt ;
654681 }
682+ continue ;
683+ }
684+
685+ /* Probe phase done. For RIGHT or FULL joins, scan hash table for unmatched right tuples */
686+ if (join_type_ == JoinType::Right || join_type_ == JoinType::Full) {
687+ if (!right_idx_iter_.has_value ()) {
688+ right_idx_iter_ = hash_table_.begin ();
689+ }
690+
691+ auto & it = right_idx_iter_.value ();
692+ while (it != hash_table_.end ()) {
693+ if (!it->second .matched ) {
694+ std::vector<common::Value> joined_values;
695+ for (size_t i = 0 ; i < left_schema.column_count (); ++i) {
696+ joined_values.push_back (common::Value::make_null ());
697+ }
698+ joined_values.insert (joined_values.end (), it->second .tuple .values ().begin (),
699+ it->second .tuple .values ().end ());
700+ out_tuple = Tuple (std::move (joined_values));
701+ it->second .matched = true ; /* Mark as emitted */
702+ it++;
703+ return true ;
704+ }
705+ it++;
706+ }
655707 }
708+
709+ set_state (ExecState::Done);
710+ return false ;
656711 }
657712}
658713
0 commit comments