Skip to content

Commit 841372b

Browse files
committed
Add 3-way diff highlights
1 parent 00c1d4a commit 841372b

5 files changed

Lines changed: 326 additions & 127 deletions

File tree

EvalData/models/base_models.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -850,3 +850,92 @@ def is_valid(self):
850850
return False
851851

852852
return super(TextSegmentWithTwoTargets, self).is_valid()
853+
854+
855+
def compute_char_diff_map(text_a, text_b, char_based=False):
856+
"""
857+
Compute per-character diff maps between two texts using SequenceMatcher.
858+
859+
Returns (diff_map_a, diff_map_b) where each is a list with one entry per
860+
character. Entries are None (no diff), "sub", "ins", or "del".
861+
862+
Args:
863+
text_a: First text (plain, already HTML-escaped if needed).
864+
text_b: Second text.
865+
char_based: If True, diff at character level. If False, diff at word
866+
level and expand back to character positions.
867+
"""
868+
if not text_a and not text_b:
869+
return ([], [])
870+
if not text_a:
871+
return ([], ["ins"] * len(text_b))
872+
if not text_b:
873+
return (["del"] * len(text_a), [])
874+
875+
if char_based:
876+
toks_a = list(text_a)
877+
toks_b = list(text_b)
878+
else:
879+
toks_a = text_a.split()
880+
toks_b = text_b.split()
881+
882+
matcher = SequenceMatcher(None, toks_a, toks_b)
883+
884+
diff_map_a = [None] * len(text_a)
885+
diff_map_b = [None] * len(text_b)
886+
887+
if char_based:
888+
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
889+
if tag == 'replace':
890+
for k in range(i1, i2):
891+
diff_map_a[k] = "sub"
892+
for k in range(j1, j2):
893+
diff_map_b[k] = "sub"
894+
elif tag == 'insert':
895+
for k in range(j1, j2):
896+
diff_map_b[k] = "ins"
897+
elif tag == 'delete':
898+
for k in range(i1, i2):
899+
diff_map_a[k] = "del"
900+
else:
901+
# Word-based: map word indices back to character positions.
902+
# Build char offset arrays for each word.
903+
def word_char_ranges(text, tokens):
904+
ranges = []
905+
pos = 0
906+
for i, tok in enumerate(tokens):
907+
start = text.index(tok, pos)
908+
end = start + len(tok)
909+
# Include trailing space in the range (except last word)
910+
if i < len(tokens) - 1:
911+
next_start = text.index(tokens[i + 1], end)
912+
end = next_start
913+
ranges.append((start, end))
914+
pos = end
915+
return ranges
916+
917+
ranges_a = word_char_ranges(text_a, toks_a)
918+
ranges_b = word_char_ranges(text_b, toks_b)
919+
920+
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
921+
if tag == 'replace':
922+
for wi in range(i1, i2):
923+
s, e = ranges_a[wi]
924+
for k in range(s, e):
925+
diff_map_a[k] = "sub"
926+
for wi in range(j1, j2):
927+
s, e = ranges_b[wi]
928+
for k in range(s, e):
929+
diff_map_b[k] = "sub"
930+
elif tag == 'insert':
931+
for wi in range(j1, j2):
932+
s, e = ranges_b[wi]
933+
for k in range(s, e):
934+
diff_map_b[k] = "ins"
935+
elif tag == 'delete':
936+
for wi in range(i1, i2):
937+
s, e = ranges_a[wi]
938+
for k in range(s, e):
939+
diff_map_a[k] = "del"
940+
941+
return (diff_map_a, diff_map_b)

EvalData/models/contrastive_assessment_document.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,34 @@ def context_right(self, first=5, separator=' '):
127127
def is_valid(self):
128128
return super(TextSegmentWithThreeTargetsWithContext, self).is_valid()
129129

130+
def compute_pairwise_diff_maps(self, char_based=False):
131+
"""
132+
Compute per-character diff maps for all 3 translation pairs.
133+
134+
Returns a dict with 6 JSON-encoded diff map strings:
135+
"1vs2", "2vs1", "1vs3", "3vs1", "2vs3", "3vs2"
136+
Each value is a JSON array with one entry per character:
137+
null (no diff), "sub", "ins", or "del".
138+
"""
139+
import json
140+
from EvalData.models.base_models import compute_char_diff_map
141+
142+
t1 = self.target1Text or ''
143+
t2 = self.target2Text or ''
144+
t3 = self.target3Text or ''
145+
146+
d1vs2, d2vs1 = compute_char_diff_map(t1, t2, char_based=char_based)
147+
d1vs3, d3vs1 = compute_char_diff_map(t1, t3, char_based=char_based)
148+
d2vs3, d3vs2 = compute_char_diff_map(t2, t3, char_based=char_based)
149+
150+
return {
151+
'diff_1vs2': json.dumps(d1vs2),
152+
'diff_2vs1': json.dumps(d2vs1),
153+
'diff_1vs3': json.dumps(d1vs3),
154+
'diff_3vs1': json.dumps(d3vs1),
155+
'diff_2vs3': json.dumps(d2vs3),
156+
'diff_3vs2': json.dumps(d3vs2),
157+
}
130158

131159
@AnnotationTaskRegistry.register
132160
class ContrastiveAssessmentDocumentTask(BaseMetadata):

EvalView/static/EvalView/js/contrastive-assessment-document.js

Lines changed: 150 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -134,86 +134,6 @@ function submit_contrastive_row($row, $itemBox1, $itemBox2, $itemBox3) {
134134
});
135135
}
136136

137-
// Override submit_finish_document for contrastive mode
138-
async function submit_finish_document_contrastive(override_tutorial_check) {
139-
override_tutorial_check = override_tutorial_check || false;
140-
141-
// Validate document comment if required
142-
if (typeof commentsDocRequired !== 'undefined' && commentsDocRequired) {
143-
var $docComment = $('#doc-comment-input');
144-
if ($docComment.length && !$docComment.val().trim()) {
145-
if (typeof _show_error_box === 'function') {
146-
_show_error_box('Please provide a document comment before submitting.', 4000);
147-
} else {
148-
alert('Please provide a document comment before submitting.');
149-
}
150-
return false;
151-
}
152-
}
153-
154-
// Get all contrastive rows
155-
var $rows = $('.contrastive-row');
156-
157-
// Validate all forms if not skipping tutorial check
158-
if (!override_tutorial_check && typeof MQM_HANDLERS !== 'undefined') {
159-
var allValid = true;
160-
$rows.each(function() {
161-
var $row = $(this);
162-
var itemId = $row.data('item-id');
163-
164-
for (var k = 1; k <= NUM_CANDIDATES; k++) {
165-
var $itemBox = $row.find('#item-' + itemId + '-' + k);
166-
var handler = MQM_HANDLERS[$itemBox.data('item-id')];
167-
if (handler && !handler.validate_form()) {
168-
allValid = false;
169-
return false;
170-
}
171-
}
172-
if (!allValid) return false;
173-
});
174-
175-
if (!allValid) {
176-
return false;
177-
}
178-
}
179-
180-
// Prevent multiclicks
181-
$("#button-next-doc").prop('disabled', true);
182-
183-
$("#form-next-doc > input[name='end_timestamp']").val(Date.now() / 1000);
184-
185-
// Submit each row
186-
try {
187-
for (var i = 0; i < $rows.length; i++) {
188-
var $row = $($rows[i]);
189-
var itemId = $row.data('item-id');
190-
var $itemBox1 = $row.find('#item-' + itemId + '-1');
191-
var $itemBox2 = $row.find('#item-' + itemId + '-2');
192-
var $itemBox3 = $row.find('#item-' + itemId + '-3');
193-
194-
await submit_contrastive_row($row, $itemBox1, $itemBox2, $itemBox3);
195-
}
196-
197-
// Add document comment to hidden form if enabled
198-
if (typeof commentsDocEnabled !== 'undefined' && commentsDocEnabled) {
199-
var $docComment = $('#doc-comment-input');
200-
if ($docComment.length) {
201-
var $commentInput = $("#form-next-doc").find('input[name="comment"]');
202-
if (!$commentInput.length) {
203-
$('<input>').attr({type: 'hidden', name: 'comment', value: $docComment.val()}).appendTo('#form-next-doc');
204-
} else {
205-
$commentInput.val($docComment.val());
206-
}
207-
}
208-
}
209-
$("#form-next-doc").trigger("submit");
210-
} catch (error) {
211-
console.error('Error submitting contrastive items:', error);
212-
await new Promise(function(resolve) { setTimeout(resolve, 5000); });
213-
$("#button-next-doc").prop('disabled', false);
214-
}
215-
}
216-
217137
// Initialize when DOM is ready
218138
$(document).ready(function() {
219139
// Check if toggle functionality should be enabled
@@ -373,12 +293,6 @@ $(document).ready(function() {
373293
});
374294
});
375295

376-
// Override the button-next-doc handler
377-
$("#button-next-doc").off("click");
378-
$("#button-next-doc").on("click", function() {
379-
submit_finish_document_contrastive(false);
380-
});
381-
382296
// Sync document comment to hidden input on doc form submit
383297
$("#button-doc").on("click", function() {
384298
var $form = $(this).closest('form');
@@ -408,7 +322,7 @@ $(document).ready(function() {
408322
$("#skip-tutorial").prop('disabled', true);
409323
$(".button-submit-contrastive").trigger("click");
410324
$(".slider").slider('value', 0);
411-
submit_finish_document_contrastive(true);
325+
$("#button-doc").trigger("click");
412326
});
413327
});
414328

@@ -492,3 +406,152 @@ function initializeContrastiveRows() {
492406
}
493407
});
494408
}
409+
410+
411+
// ============================================================
412+
// Contrastive diff highlighting on hover
413+
// ============================================================
414+
415+
/**
416+
* Wrap plain text inside a target-text div with per-character spans for SQM mode.
417+
* ESA mode already has .mqm_char spans from MQMItemHandler.
418+
*/
419+
function wrapTargetTextInCharSpans($targetText) {
420+
// Skip if already wrapped (ESA mode or already processed)
421+
if ($targetText.children('.mqm_char').length > 0 || $targetText.children('.diff-char').length > 0) {
422+
return;
423+
}
424+
var text = $targetText.text();
425+
var html = '';
426+
var charIndex = 0;
427+
for (var i = 0; i < text.length; i++) {
428+
var ch = text[i];
429+
if (ch === '\n') {
430+
html += '<br>';
431+
} else {
432+
html += '<span class="diff-char" char_id="' + charIndex + '">' + ch + '</span>';
433+
charIndex++;
434+
}
435+
}
436+
$targetText.html(html);
437+
}
438+
439+
/**
440+
* Get the translation number (1, 2, or 3) from an item-box's data-item-id.
441+
* data-item-id format: "XXXXX-N" where N is 1, 2, or 3.
442+
*/
443+
function getTranslationNumber($itemBox) {
444+
var itemId = $itemBox.attr('data-item-id') || '';
445+
return itemId.charAt(itemId.length - 1);
446+
}
447+
448+
/**
449+
* Get the char spans inside a target-text (works for both ESA .mqm_char and SQM .diff-char).
450+
*/
451+
function getCharSpans($itemBox) {
452+
var $target = $itemBox.find('.target-text');
453+
var $chars = $target.children('.mqm_char').not('.span_missing');
454+
if ($chars.length === 0) {
455+
$chars = $target.children('.diff-char');
456+
}
457+
return $chars;
458+
}
459+
460+
/**
461+
* Apply diff highlighting classes to char spans based on a diff map array.
462+
* cssClass is one of: 'contrastive-diff-vs-second', 'contrastive-diff-vs-third'.
463+
* For the hovered item, we call this twice (once per other translation) and
464+
* upgrade to 'contrastive-diff-both' where both apply.
465+
*/
466+
function applyDiffClasses($chars, diffMap, cssClass) {
467+
if (!diffMap || !$chars.length) return;
468+
var len = Math.min($chars.length, diffMap.length);
469+
for (var i = 0; i < len; i++) {
470+
if (diffMap[i]) {
471+
var el = $chars.eq(i);
472+
// Check if already has the other diff class -> upgrade to both
473+
if (cssClass === 'contrastive-diff-vs-second' && el.hasClass('contrastive-diff-vs-third')) {
474+
el.removeClass('contrastive-diff-vs-third').addClass('contrastive-diff-both');
475+
} else if (cssClass === 'contrastive-diff-vs-third' && el.hasClass('contrastive-diff-vs-second')) {
476+
el.removeClass('contrastive-diff-vs-second').addClass('contrastive-diff-both');
477+
} else if (!el.hasClass('contrastive-diff-both')) {
478+
el.addClass(cssClass);
479+
}
480+
}
481+
}
482+
}
483+
484+
/**
485+
* Remove all contrastive diff classes from all char spans in a row.
486+
*/
487+
function clearDiffClasses($row) {
488+
$row.find('.contrastive-diff-vs-second, .contrastive-diff-vs-third, .contrastive-diff-both')
489+
.removeClass('contrastive-diff-vs-second contrastive-diff-vs-third contrastive-diff-both');
490+
}
491+
492+
/**
493+
* Initialize contrastive diff hover handlers for all rows.
494+
* Called once at page load.
495+
*/
496+
function initContrastiveDiffHover() {
497+
// Wrap SQM target text in char spans if needed
498+
$('.contrastive-row .item-box').each(function() {
499+
wrapTargetTextInCharSpans($(this).find('.target-text'));
500+
});
501+
502+
// Set up hover handlers on each item-box within a contrastive row
503+
$('.contrastive-row .item-box').on('mouseenter', function() {
504+
if (!$('body').hasClass('show-contrastive-diffs')) return;
505+
506+
var $hoveredBox = $(this);
507+
var $row = $hoveredBox.closest('.contrastive-row');
508+
var hoveredNum = getTranslationNumber($hoveredBox);
509+
510+
// Find the other two item-boxes in this row
511+
var others = [];
512+
$row.find('.item-box').each(function() {
513+
if (getTranslationNumber($(this)) !== hoveredNum) {
514+
others.push($(this));
515+
}
516+
});
517+
if (others.length < 2) return;
518+
519+
// Sort others by their translation number
520+
others.sort(function(a, b) {
521+
return getTranslationNumber(a).localeCompare(getTranslationNumber(b));
522+
});
523+
var $otherY = others[0]; // "second" = yellow
524+
var $otherZ = others[1]; // "third" = teal
525+
var numY = getTranslationNumber($otherY);
526+
var numZ = getTranslationNumber($otherZ);
527+
528+
// Parse diff maps from the hovered box
529+
var diffVsY = $hoveredBox.data('diff-vs-' + numY);
530+
var diffVsZ = $hoveredBox.data('diff-vs-' + numZ);
531+
532+
// Parse diff maps from the other boxes (vs hovered)
533+
var diffYvsHovered = $otherY.data('diff-vs-' + hoveredNum);
534+
var diffZvsHovered = $otherZ.data('diff-vs-' + hoveredNum);
535+
536+
// Clear any previous highlights
537+
clearDiffClasses($row);
538+
539+
// Apply to hovered box: yellow for vs-Y, teal for vs-Z, green for overlap
540+
var $hoveredChars = getCharSpans($hoveredBox);
541+
applyDiffClasses($hoveredChars, diffVsY, 'contrastive-diff-vs-second');
542+
applyDiffClasses($hoveredChars, diffVsZ, 'contrastive-diff-vs-third');
543+
544+
// Apply to other Y: yellow (same color as hovered-vs-Y perspective)
545+
var $charsY = getCharSpans($otherY);
546+
applyDiffClasses($charsY, diffYvsHovered, 'contrastive-diff-vs-second');
547+
548+
// Apply to other Z: teal (same color as hovered-vs-Z perspective)
549+
var $charsZ = getCharSpans($otherZ);
550+
applyDiffClasses($charsZ, diffZvsHovered, 'contrastive-diff-vs-third');
551+
});
552+
553+
$('.contrastive-row .item-box').on('mouseleave', function() {
554+
var $row = $(this).closest('.contrastive-row');
555+
clearDiffClasses($row);
556+
});
557+
}

0 commit comments

Comments
 (0)