From 3000862b8695d2fe2ebe051e452321f0c4e049f8 Mon Sep 17 00:00:00 2001
From: "Marco Emilio \"sphakka\" Poleggi"
 <7766137+sphakka@users.noreply.github.com>
Date: Thu, 4 Apr 2024 20:46:47 +0200
Subject: [PATCH 1/9] fix(qif): restored correct handling of 'date_fmt', better
 CSV normalization with csvtrim

Signed-off-by: Marco Emilio "sphakka" Poleggi <7766137+sphakka@users.noreply.github.com>
---
 csv2ofx/utilz/csvtrim | 132 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 115 insertions(+), 17 deletions(-)

diff --git a/csv2ofx/utilz/csvtrim b/csv2ofx/utilz/csvtrim
index 68ea3a0..ed31033 100755
--- a/csv2ofx/utilz/csvtrim
+++ b/csv2ofx/utilz/csvtrim
@@ -8,12 +8,39 @@
 ################################################################################
 [ "$DEBUG" ] && set -x
 set -o pipefail
+shopt -s expand_aliases
 
 __author__='Marco "sphakka" Poleggi'
+__version__='0.1.1'
 
 myself=$(basename $0)
+
+# defaults
 dfields='4,6,9,12-16,19-21'
-dseparator=';'
+dfs=';'
+dtrmtln=3
+
+# alternatives to use for quoted fields with rogue fs characters
+declare -A fs_replacements=(
+    [;]=','
+    [:]=','
+    [,]=';'
+)
+
+alias log_error='echo >&2 "[error] ${FUNCNAME}>"'
+alias log_info='echo >&2 "[info] ${FUNCNAME}>"'
+alias log_warn='echo >&2 "[warn] ${FUNCNAME}>"'
+
+tmp_file1=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || {
+    log_error "Can't create tmp file"
+    exit 1
+}
+
+tmp_file2=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || {
+    log_error "Can't create file"
+    exit 1
+}
+
 
 ################################################################################
 
@@ -21,13 +48,13 @@ usage() {
     echo >&2 "
 Usage:
 
-  $myself CSV_FILE [FIELDS [SEPARATOR]]
+  $myself CSV_FILE [FIELDS [FS]]
 
 where
 
-  CSV_FILE:  path to an existing file or '-' for stdin
-  FIELDS:    cut-style list of fields to keep. Default: '$dfields'
-  SEPARATOR: a single (escaped) character. Default: '$dseparator'
+  CSV_FILE  path to an existing file or '-' for stdin
+  FIELDS    cut-style list of fields to keep. Default: '$dfields'
+  FS        a single (escaped) character as field separator. Default: '$dfs'
 
 (default values are for exports from UBS CH (DE/FR/IT))
 
@@ -39,41 +66,112 @@ e.g.
   exit 1
 }
 
-trap '[ $? -ne 0 ] && usage' EXIT
+function cleanup() {
+    [ $? -ne 0 ] && usage
+    [[ "$DEBUG" ]] && log_info "tmp file kept: tmp_file1=$tmp_file1, tmp_file2=$tmp_file2" \
+            || rm -f $tmp_file1 $tmp_file2
+}
+
+
+# trap '[ $? -ne 0 ] && usage' EXIT
+trap cleanup EXIT SIGINT SIGTERM
 
 input_csv=${1:?'arg #1 missing: input CSV file'}
 fields=${2:-$dfields}
-separator=${3:-$dseparator}
+fs=${3:-$dfs}
+trmtln=${3:-$dtrmtln}
 
 
-function _trim () {
-    local dlmtrc=${1:?'arg #1 missing: delimiter character'}
+# Count the number of field separator in a string
+function count_fs() {
+    local string=${1:?'arg #1 missing: input string'}
+    local fs=${2:?'arg #2 missing: field separator'}
+
+    fsn=$(echo "${string}" | sed -nr "s/${fs}/\n/g p" | wc -l) || {
+        log_error "sed filter failed"
+        return  1
+    }
+    echo $((fsn - 1))
+}
+
+
+function trim () {
+    local fs=${1:?'arg #1 missing: field delimiter character'}
     local fields=${2:?'arg #2 missing: cut-style fields to keep'}
     local incsvf=${3:?'arg #3 missing: input CSV file'}
-    local trmtln=${4:-'3'} # number of trailing lines to trim
+    local trmtln=${4:?'arg #4 missing: number of trailing lines to trim'}
 
     local head_opts=
 
     if [ "$trmtln" ]; then
         [[ "$trmtln" =~ ^[[:digit:]]+$ ]] || {
-            echo >&2 "[error] ${trmtln}: number of traling lines to trim is not an integer"
+            log_error "${trmtln}: number of traling lines to trim is not an integer"
             return  1
         }
         head_opts="-n-${trmtln}"
     fi
 
+    # escape any separator characters that might appear in quoted fields (yep,
+    # that's legal for CSV files) --
+    # <https://unix.stackexchange.com/questions/48672/only-remove-commas-embedded-within-quotes-in-a-comma-delimited-file>.
+    # Quotes are remove as well.
+    local quotes="\"'"
+    local fs_repl=${fs_replacements[${fs}]} || {
+        log_error "[bug] no replacement configured for fs '${fs}'. Please correct the 'fs_replacements' array"
+        return  1
+    }
+    sed -r ":a;
+            s/([${quotes}])([^${quotes}${fs}]*)${fs}(.*?)([${quotes}])/\1\2${fs_repl}\3\4/;
+            ta;
+            s/[${quotes}]//g" $incsvf > $tmp_file1 || {
+        log_error "can't treat quoted field(s)"
+        return  1
+    }
+
+    # normalize field number where possible -- remove trailing fs, append
+    # fs. The correct number of fields is inferred by the first (supposedly
+    # the header) line.
+    local header=$(head -n1 $tmp_file1 | sed -r "s/${fs}*$//g")
+    log_info "header (possibly fixed): ${header}"
+    fsn=$(count_fs "${header}" $fs) || {
+        log_error "can't compute the header's field number"
+        return  1
+    }
+    log_info "header FS count: ${fsn}"
+
+    echo $header > $tmp_file2
+
+    tail -n+2 $tmp_file1 | while read line; do
+        local lfs=$(count_fs "${line}" $fs)
+        local fsd=$(( $lfs - $fsn ))
+        # ~abs
+        local afsd=${fsd#-}
+        # repeat extra fs
+        local xfs=$(printf "${fs}%.0s" $(eval "echo {1..${afsd}}"))
+        if [[  $fsd -lt 0 ]]; then
+            fsd=${fsd#-}
+            line+=$xfs
+            log_info "'${line}': fixed, +${fsd} FS"
+        elif [[  $fsd -gt 0 ]]; then
+            # trim at the tail
+            line=${line%${xfs}}
+            log_info "'${line}': fixed, -${fsd} FS"
+        fi
+        echo $line >> $tmp_file2
+    done
+
     # trnxs detailed as "Solde prix prestations" are split with a
     # "Sous-montant" value, but empty "Débit; Crédit; Solde" columns (the
     # trailing three). To avoid breaking csv2ofx, these must be filtered
-    # out... The kludge is to skip rows ending with 3 consecutive delimiter chars
-    head $head_opts $incsvf | cut -d$dlmtrc -f$fields | \
-        sed -nr "/${dlmtrc}${dlmtrc}${dlmtrc}\s*$/ !p" || {
-            echo >&2 "[error] ${incsvf}: can't filter input file"
+    # out... The kludge is to skip rows ending with an empty field
+    head $head_opts $tmp_file2 | cut -d$fs -f$fields | \
+        sed -nr "/${fs}\s*$/ !p" || {
+            log_error "can't filter input file"
             return  1
         }
 }
 
-_trim $separator $fields $input_csv || {
-    echo >&2 "[error] ${input_csv}: trimming failed..."
+trim $fs $fields $input_csv $trmtln || {
+    log_error "${input_csv}: trimming failed..."
     exit 1
 }

From 7432ca83076b85cc8556a04af987b7ae3219bf29 Mon Sep 17 00:00:00 2001
From: "Marco Emilio \"sphakka\" Poleggi"
 <7766137+sphakka@users.noreply.github.com>
Date: Fri, 5 Apr 2024 12:41:21 +0200
Subject: [PATCH 2/9] dev(utilz): self-testing capabilities for csvtrim

Signed-off-by: Marco Emilio "sphakka" Poleggi <7766137+sphakka@users.noreply.github.com>
---
 csv2ofx/utilz/csvtrim | 48 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/csv2ofx/utilz/csvtrim b/csv2ofx/utilz/csvtrim
index ed31033..6d42c5a 100755
--- a/csv2ofx/utilz/csvtrim
+++ b/csv2ofx/utilz/csvtrim
@@ -6,7 +6,7 @@
 #
 # TO-DO: integrate in csv2ofx?
 ################################################################################
-[ "$DEBUG" ] && set -x
+[[ "$CSVTRIM_DEBUG" ]] && set -x
 set -o pipefail
 shopt -s expand_aliases
 
@@ -16,9 +16,9 @@ __version__='0.1.1'
 myself=$(basename $0)
 
 # defaults
-dfields='4,6,9,12-16,19-21'
-dfs=';'
-dtrmtln=3
+dfields=${CSVTRIM_FIELDS:-'4,6,9,12-16,19-21'}
+dfs=${CSVTRIM_DFS:-';'}
+dtrmtln=${CSVTRIM_DTRMTLN-3}
 
 # alternatives to use for quoted fields with rogue fs characters
 declare -A fs_replacements=(
@@ -42,19 +42,46 @@ tmp_file2=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || {
 }
 
 
+_test_data=$(cat <<'EOF'
+header;with;four;fields;;
+too;many;fields;here;;;
+too;'many; also';there;;;
+"too,few; fields";here
+line;is;a;split
+split;1
+split;2;;;
+this;line;is;OK
+and;this too;is;right
+EOF
+)
+
+[[ $CSVTRIM_SELFTEST ]] && {
+    unset CSVTRIM_SELFTEST
+    cmd="$0 - 1- ; 0"
+    log_info "Self-testing with command '${cmd}'"
+    echo "${_test_data}" | exec $cmd && {
+        log_info "Self-test OK :-)"
+        exit 0
+    }
+    log_info "Self-test KO :-("
+    exit 1
+}
+
+
 ################################################################################
 
 usage() {
     echo >&2 "
 Usage:
 
-  $myself CSV_FILE [FIELDS [FS]]
+  $myself CSV_FILE [FIELDS [FS [TRIM_NLINES]]]
 
 where
 
-  CSV_FILE  path to an existing file or '-' for stdin
-  FIELDS    cut-style list of fields to keep. Default: '$dfields'
-  FS        a single (escaped) character as field separator. Default: '$dfs'
+  CSV_FILE      path to an existing file or '-' for stdin
+  FIELDS        cut-style list of fields to keep. Default: '$dfields'
+  FS            a single (escaped) character as field separator. Default: '$dfs'
+  TRIM_NLINES   discard N trailing lines
 
 (default values are for exports from UBS CH (DE/FR/IT))
 
@@ -68,7 +95,8 @@ e.g.
 
 function cleanup() {
     [ $? -ne 0 ] && usage
-    [[ "$DEBUG" ]] && log_info "tmp file kept: tmp_file1=$tmp_file1, tmp_file2=$tmp_file2" \
+    [[ "$DEBUG" ]] && \
+        log_info "tmp file kept: tmp_file1=$tmp_file1, tmp_file2=$tmp_file2" \
             || rm -f $tmp_file1 $tmp_file2
 }
 
@@ -79,7 +107,7 @@ trap cleanup EXIT SIGINT SIGTERM
 input_csv=${1:?'arg #1 missing: input CSV file'}
 fields=${2:-$dfields}
 fs=${3:-$dfs}
-trmtln=${3:-$dtrmtln}
+trmtln=${4:-$dtrmtln}
 
 
 # Count the number of field separator in a string

From bdaecd4b79dc159f6417bec0fa913f6ddc198496 Mon Sep 17 00:00:00 2001
From: "Marco Emilio \"sphakka\" Poleggi"
 <7766137+sphakka@users.noreply.github.com>
Date: Tue, 16 Jul 2024 14:21:22 +0200
Subject: [PATCH 3/9] dev(utilz): csvtrim reworked around getopt with fixes for
 single quotes. +output sorting

Signed-off-by: Marco Emilio "sphakka" Poleggi <7766137+sphakka@users.noreply.github.com>
---
 csv2ofx/utilz/csvtrim | 277 +++++++++++++++++++++++++++++++-----------
 1 file changed, 204 insertions(+), 73 deletions(-)

diff --git a/csv2ofx/utilz/csvtrim b/csv2ofx/utilz/csvtrim
index 6d42c5a..9f85e0d 100755
--- a/csv2ofx/utilz/csvtrim
+++ b/csv2ofx/utilz/csvtrim
@@ -6,7 +6,7 @@
 #
 # TO-DO: integrate in csv2ofx?
 ################################################################################
-[[ "$CSVTRIM_DEBUG" ]] && set -x
+[[ "$CSVTRIM_TRACE" ]] && set -x
 set -o pipefail
 shopt -s expand_aliases
 
@@ -18,7 +18,17 @@ myself=$(basename $0)
 # defaults
 dfields=${CSVTRIM_FIELDS:-'4,6,9,12-16,19-21'}
 dfs=${CSVTRIM_DFS:-';'}
-dtrmtln=${CSVTRIM_DTRMTLN-3}
+dtrim_trail=${CSVTRIM_DTRIM_TRAIL-3}
+dsort_key1=${CSVTRIM_DSORT_KEY1-4}
+dsort_key2=${CSVTRIM_DSORT_KEY2-8}
+
+# option vars
+fields=${dfields}
+fs=${dfs}
+trim_trail=${dtrim_trail}
+sort_key1=${dsort_key1}
+sort_key2=${dsort_key2}
+
 
 # alternatives to use for quoted fields with rogue fs characters
 declare -A fs_replacements=(
@@ -27,9 +37,47 @@ declare -A fs_replacements=(
     [,]=';'
 )
 
+# CSV columns' index-label crossref for UBS-FR -- just informational for
+# now. Sorting is indeed made on numerical keys.
+declare -A cols_in_ubs_fr=(
+    [4]='Produit'
+    [6]='Monn.'
+    [9]='Description'
+    [12]='Date de valeur'
+    [13]='Description 1'
+    [14]='Description 2'
+    [15]='Description 3'
+    [16]='N° de transaction'
+    [19]='Débit'
+    [20]='Crédit'
+    [21]='Solde'
+)
+
+declare -A cols_out_ubs_fr=(
+    [1]='Produit'
+    [2]='Monn.'
+    [3]='Description'
+    [4]='Date de valeur'
+    [5]='Description 1'
+    [6]='Description 2'
+    [7]='Description 3'
+    [8]='N° de transaction'
+    [9]='Débit'
+    [10]='Crédit'
+    [11]='Solde'
+)
+# Primary and secondary ouput sorting keys indices
+sort_key1=$dsort_key1
+sort_key2=$dsort_key2
+
 alias log_error='echo >&2 "[error] ${FUNCNAME}>"'
 alias log_info='echo >&2 "[info] ${FUNCNAME}>"'
 alias log_warn='echo >&2 "[warn] ${FUNCNAME}>"'
+if [[ $CSVTRIM_DEBUG ]]; then
+    alias log_debug='echo >&2 "[debug] ${FUNCNAME}>"'
+else
+    alias log_debug=':'
+fi
 
 tmp_file1=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || {
     log_error "Can't create tmp file"
@@ -42,73 +90,83 @@ tmp_file2=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || {
 }
 
 
+# 6 columns. Use CSVTRIM_DSORT_KEY1=1 CSVTRIM_DSORT_KEY2=2 for self-testing
 _test_data=$(cat <<'EOF'
-header;with;four;fields;;
-too;many;fields;here;;;
-too;'many; also';there;;;
-"too,few; fields";here
-line;is;a;split
-split;1
-split;2;;;
-this;line;is;OK
-and;this too;is;right
+date;trxnid;header;with;six;fields;;
+02.03.2024;txn-07;will stay;with;3 fields;less;;;
+01.02.2024;txn-01;will;"stay; also";with 1;field less;
+01.02.2024;txn-03;"too,few; fields";here
+01.02.2024;txn-02;this split;will stay;;1'234.5
+01.02.2024;txn-02;comment is removed;;;
+01.02.2024;txn-02;split-1 is removed
+01.02.2024;txn-02;split-2 is removed;;;
+04.03.2024;txn-05;this line;is;really;OK
+07.01.2024;txn-04;this too short by one;removed; as a split
+08.02.2024;txn-06;thousand separator; is not a quotation mark;1'234.5;6'789.0
 EOF
 )
 
-[[ $CSVTRIM_SELFTEST ]] && {
-    unset CSVTRIM_SELFTEST
-    cmd="$0 - 1- ; 0"
-    log_info "Self-testing with command '${cmd}'"
-    echo "${_test_data}" | exec $cmd && {
-        log_info "Self-test OK :-)"
-        exit 0
-    }
-    log_info "Self-test KO :-("
-    exit 1
-}
+_test_results=$(cat <<'EOF'
+date;trxnid;header;with;six;fields
+01.02.2024;txn-01;will;stay, also;with 1;field less
+01.02.2024;txn-02;this split;will stay;;1'234.5
+08.02.2024;txn-06;thousand separator; is not a quotation mark;1'234.5;6'789.0
+02.03.2024;txn-07;will stay;with;3 fields;less
+04.03.2024;txn-05;this line;is;really;OK
+EOF
+)
 
 
 ################################################################################
 
 usage() {
-    echo >&2 "
+    cat >&2 <<EOF
 Usage:
 
-  $myself CSV_FILE [FIELDS [FS [TRIM_NLINES]]]
+    ${myself} [OPTIONS] CSV_FILE
+
+where:
+
+    CSV_FILE      path to the input CSV file or '-' for stdin
+
+Options:
+
+(defaults are in parentheses and are valid for exports from UBS CH DE/FR/IT)
 
-where
+    -h  print this help
+    -f  cut-style list of fields to keep ('${dfields}')
+    -s  a single (escaped) character as field separator ('${dfs}')
+    -t  discard N trailing lines ('${trim_trail}')
+    -1  primary output sort key ('${dsort_key1}')
+    -2  secondary output sort key ('${dsort_key2}')
 
-  CSV_FILE      path to an existing file or '-' for stdin
-  FIELDS        cut-style list of fields to keep. Default: '$dfields'
-  FS            a single (escaped) character as field separator. Default: '$dfs'
-  TRIM_NLINES   discard N trailing lines
 
-(default values are for exports from UBS CH (DE/FR/IT))
+Examples:
 
-e.g.
+    $myself -f1,3,5-8 -s\; hairy_export.csv
+    cat hairy_export.csv | $myself -f1,3,5-8 -s\; -
 
-  $myself hairy_export.csv 1,3,5-8 \;
-  cat hairy_export.csv | $myself - 1,3,5-8 \;"
 
-  exit 1
+Selftest:
+
+    CSVTRIM_SELFTEST=1 csvtrim ...
+
+
+Debug:
+
+    CSVTRIM_DEBUG=1 [CSVTRIM_TRACE=1] csvtrim ...
+
+EOF
 }
 
 function cleanup() {
-    [ $? -ne 0 ] && usage
-    [[ "$DEBUG" ]] && \
-        log_info "tmp file kept: tmp_file1=$tmp_file1, tmp_file2=$tmp_file2" \
+    [[ "${CSVTRIM_DEBUG}" ]] && \
+        log_info "Tmp files kept: tmp_file1='${tmp_file1}', tmp_file2='${tmp_file2}'" \
             || rm -f $tmp_file1 $tmp_file2
 }
 
-
-# trap '[ $? -ne 0 ] && usage' EXIT
 trap cleanup EXIT SIGINT SIGTERM
 
-input_csv=${1:?'arg #1 missing: input CSV file'}
-fields=${2:-$dfields}
-fs=${3:-$dfs}
-trmtln=${4:-$dtrmtln}
-
 
 # Count the number of field separator in a string
 function count_fs() {
@@ -116,7 +174,7 @@ function count_fs() {
     local fs=${2:?'arg #2 missing: field separator'}
 
     fsn=$(echo "${string}" | sed -nr "s/${fs}/\n/g p" | wc -l) || {
-        log_error "sed filter failed"
+        log_error "Sed filter failed"
         return  1
     }
     echo $((fsn - 1))
@@ -127,32 +185,35 @@ function trim () {
     local fs=${1:?'arg #1 missing: field delimiter character'}
     local fields=${2:?'arg #2 missing: cut-style fields to keep'}
     local incsvf=${3:?'arg #3 missing: input CSV file'}
-    local trmtln=${4:?'arg #4 missing: number of trailing lines to trim'}
+    local trim_trail=${4:?'arg #4 missing: number of trailing lines to trim'}
 
     local head_opts=
 
-    if [ "$trmtln" ]; then
-        [[ "$trmtln" =~ ^[[:digit:]]+$ ]] || {
-            log_error "${trmtln}: number of traling lines to trim is not an integer"
+    if [ "$trim_trail" ]; then
+        [[ "$trim_trail" =~ ^[[:digit:]]+$ ]] || {
+            log_error "${trim_trail}: number of traling lines to trim is not an integer"
             return  1
         }
-        head_opts="-n-${trmtln}"
+        head_opts="-n-${trim_trail}"
     fi
 
     # escape any separator characters that might appear in quoted fields (yep,
     # that's legal for CSV files) --
     # <https://unix.stackexchange.com/questions/48672/only-remove-commas-embedded-within-quotes-in-a-comma-delimited-file>.
-    # Quotes are remove as well.
-    local quotes="\"'"
+    # Double quotes are removed as well. Single quotes must stay becasue
+    # adjacent financial-style numerals such as 1'234.5;6'789.0 would make a
+    # fake quoted string '234.5;6'.
+    local quotes="\""
     local fs_repl=${fs_replacements[${fs}]} || {
-        log_error "[bug] no replacement configured for fs '${fs}'. Please correct the 'fs_replacements' array"
+        log_error "[BUG] no replacement configured for fs '${fs}'. Please correct the 'fs_replacements' array"
         return  1
     }
-    sed -r ":a;
+    head $head_opts $incsvf | \
+        sed -r ":a;
             s/([${quotes}])([^${quotes}${fs}]*)${fs}(.*?)([${quotes}])/\1\2${fs_repl}\3\4/;
             ta;
-            s/[${quotes}]//g" $incsvf > $tmp_file1 || {
-        log_error "can't treat quoted field(s)"
+            s/[${quotes}]//g" > $tmp_file1 || {
+        log_error "Can't treat quoted field(s)"
         return  1
     }
 
@@ -160,46 +221,116 @@ function trim () {
     # fs. The correct number of fields is inferred by the first (supposedly
     # the header) line.
     local header=$(head -n1 $tmp_file1 | sed -r "s/${fs}*$//g")
-    log_info "header (possibly fixed): ${header}"
+    log_info "Header (possibly fixed): '${header}'"
     fsn=$(count_fs "${header}" $fs) || {
         log_error "can't compute the header's field number"
         return  1
     }
-    log_info "header FS count: ${fsn}"
+    log_info "FS count: ${fsn} => we have $(($fsn + 1)) fields"
 
     echo $header > $tmp_file2
 
+    local ln=2
     tail -n+2 $tmp_file1 | while read line; do
         local lfs=$(count_fs "${line}" $fs)
         local fsd=$(( $lfs - $fsn ))
-        # ~abs
+        # ~abs()
         local afsd=${fsd#-}
         # repeat extra fs
         local xfs=$(printf "${fs}%.0s" $(eval "echo {1..${afsd}}"))
         if [[  $fsd -lt 0 ]]; then
+            # mostly useless as it would create a (fake?) split and get
+            # removed lateer, but we keep the code because in the future we
+            # might introduce an explicit remove split option
             fsd=${fsd#-}
             line+=$xfs
-            log_info "'${line}': fixed, +${fsd} FS"
+            log_info "Line ${ln}: fixed, +${fsd} FS: ${line}"
         elif [[  $fsd -gt 0 ]]; then
             # trim at the tail
             line=${line%${xfs}}
-            log_info "'${line}': fixed, -${fsd} FS"
+            log_info "Line ${ln}: fixed, -${fsd} FS: ${line}"
+        fi
+
+        # Consecutive lines bearing the same "N. de transaction" (field #16) are
+        # splits with following lines bearing a "Sous-montant" (field #18) but
+        # empty "Débit; Crédit; Solde" fields (the trailing three, #19-21). To
+        # avoid breaking csv2ofx, these must be filtered out... The kludge is to
+        # skip rows ending with an empty field
+        if [[ "$line" =~ ${fs}[[:blank:]]*$ ]]; then
+            log_info "Line ${ln}: split ignored: ${line}"
+        else
+            echo $line >> $tmp_file2
         fi
-        echo $line >> $tmp_file2
+        ((ln++))
     done
+    log_info "Normalize OK"
+
+    # Sort: key1 is a date like 'dd.mm.yyyy'. To sort correctly, we use the
+    # reverse order (ISO-like) year, month, day.
+    cut -d${fs} -f${fields} $tmp_file2 | \
+        sort -t';' \
+             -k${sort_key1}.7n,${sort_key1}.10n \
+             -k${sort_key1}.4n,${sort_key1}.5n \
+             -k${sort_key1}.1n,${sort_key1}.2n \
+             -k${sort_key2} || {
+        log_error "Can't trim columns (#${fields}) or sort"
+        return  1
+    }
+    log_info "Trim OK"
+}
 
-    # trnxs detailed as "Solde prix prestations" are split with a
-    # "Sous-montant" value, but empty "Débit; Crédit; Solde" columns (the
-    # trailing three). To avoid breaking csv2ofx, these must be filtered
-    # out... The kludge is to skip rows ending with an empty field
-    head $head_opts $tmp_file2 | cut -d$fs -f$fields | \
-        sed -nr "/${fs}\s*$/ !p" || {
-            log_error "can't filter input file"
-            return  1
-        }
+################################################################################
+# main
+################################################################################
+
+while getopts "f:hs:t:1:2:" opt; do
+    case "$opt" in
+        h) usage; exit 0 ;;
+        f) fields=${OPTARG} ;;
+        s) fs=${OPTARG} ;;
+        t) trim_trail=${OPTARG} ;;
+        1) sort_key1=${OPTARG} ;;
+        2) sort_key2=${OPTARG} ;;
+        -) break ;;
+        *) usage; exit 1 ;;
+    esac
+done
+shift $((OPTIND - 1))
+
+
+[[ $CSVTRIM_SELFTEST ]] && {
+    unset CSVTRIM_SELFTEST
+    cmd="$0 -f 1- -s ; -t 0 -1 1 -2 2 -"
+    log_info "Self-testing with command '${cmd}'"
+    result=$(echo "${_test_data}" | exec $cmd) || {
+        log_error "Can't run self-test"
+        exit 1
+    }
+    [[ "$result" == "$_test_results" ]] || {
+        log_error "Self-test KO :-("
+        exit 1
+    }
+    log_info "Self-test OK :-)"
+    exit 0
 }
 
-trim $fs $fields $input_csv $trmtln || {
-    log_error "${input_csv}: trimming failed..."
+input_csv=$1
+
+if [[ "${input_csv}" ]]; then
+    [[ "${input_csv}" == '-' || -r "${input_csv}" ]] || {
+        log_error "${input_csv}: input CSV file unreadable"
+        usage
+        exit 1
+    }
+else
+    log_error "Input CSV file missing."
+    usage
+    exit 1
+fi
+
+log_debug "fields=${fields}"
+
+trim $fs $fields $input_csv $trim_trail || {
+    log_error "${input_csv}: processing failed..."
     exit 1
 }

From 8cdf29897ad9e5871a32b5857279f16886a77471 Mon Sep 17 00:00:00 2001
From: "Marco Emilio \"sphakka\" Poleggi"
 <7766137+sphakka@users.noreply.github.com>
Date: Tue, 16 Jul 2024 15:33:21 +0200
Subject: [PATCH 4/9] dev(utilz): use 'comptabilisation' instead of 'value' to
 avoid getting txns predating the period's beginning

Signed-off-by: Marco Emilio "sphakka" Poleggi <7766137+sphakka@users.noreply.github.com>
---
 csv2ofx/mappings/ubs-ch-fr.py | 2 +-
 csv2ofx/utilz/csvtrim         | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/csv2ofx/mappings/ubs-ch-fr.py b/csv2ofx/mappings/ubs-ch-fr.py
index a12cd24..d846838 100644
--- a/csv2ofx/mappings/ubs-ch-fr.py
+++ b/csv2ofx/mappings/ubs-ch-fr.py
@@ -69,7 +69,7 @@ def map_payee(tr):
     "notes": itemgetter("Description 2"),
     # switch day/month (maybe file a bug: always inverted when ambiguous like
     # '01.02.2018')
-    "date": lambda tr: fixdate(tr["Date de valeur"]),
+    "date": lambda tr: fixdate(tr["Date de comptabilisation"]),
     "desc": map_descr,
     "payee": map_payee,
     "check_num": itemgetter("N° de transaction"),
diff --git a/csv2ofx/utilz/csvtrim b/csv2ofx/utilz/csvtrim
index 9f85e0d..28776ad 100755
--- a/csv2ofx/utilz/csvtrim
+++ b/csv2ofx/utilz/csvtrim
@@ -16,7 +16,7 @@ __version__='0.1.1'
 myself=$(basename $0)
 
 # defaults
-dfields=${CSVTRIM_FIELDS:-'4,6,9,12-16,19-21'}
+dfields=${CSVTRIM_FIELDS:-'4,6,9,11,13-16,19-21'}
 dfs=${CSVTRIM_DFS:-';'}
 dtrim_trail=${CSVTRIM_DTRIM_TRAIL-3}
 dsort_key1=${CSVTRIM_DSORT_KEY1-4}
@@ -43,7 +43,8 @@ declare -A cols_in_ubs_fr=(
     [4]='Produit'
     [6]='Monn.'
     [9]='Description'
-    [12]='Date de valeur'
+    [11]='Date de comptabilisation'
+    # [12]='Date de valeur'
     [13]='Description 1'
     [14]='Description 2'
     [15]='Description 3'
@@ -57,7 +58,7 @@ declare -A cols_out_ubs_fr=(
     [1]='Produit'
     [2]='Monn.'
     [3]='Description'
-    [4]='Date de valeur'
+    [4]='Date de comptabilisation'
     [5]='Description 1'
     [6]='Description 2'
     [7]='Description 3'

From fe8ab8d962fdfa63afd9deb3432e6c69129e2e1a Mon Sep 17 00:00:00 2001
From: "Marco Emilio \"sphakka\" Poleggi"
 <7766137+sphakka@users.noreply.github.com>
Date: Tue, 22 Oct 2024 16:20:26 +0200
Subject: [PATCH 5/9] dev(utilz): csvtrim now checks for DOS-style EOL

Signed-off-by: Marco Emilio "sphakka" Poleggi <7766137+sphakka@users.noreply.github.com>
---
 csv2ofx/utilz/csvtrim | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/csv2ofx/utilz/csvtrim b/csv2ofx/utilz/csvtrim
index 28776ad..a159e58 100755
--- a/csv2ofx/utilz/csvtrim
+++ b/csv2ofx/utilz/csvtrim
@@ -11,7 +11,7 @@ set -o pipefail
 shopt -s expand_aliases
 
 __author__='Marco "sphakka" Poleggi'
-__version__='0.1.1'
+__version__='0.1.2'
 
 myself=$(basename $0)
 
@@ -80,16 +80,12 @@ else
     alias log_debug=':'
 fi
 
-tmp_file1=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || {
-    log_error "Can't create tmp file"
-    exit 1
-}
-
-tmp_file2=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || {
-    log_error "Can't create file"
-    exit 1
-}
-
+for i in {1..3}; do
+    declare tmp_file${i}=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || {
+        log_error "Can't create tmp file #${i}"
+        exit 1
+    }
+done
 
 # 6 columns. Use CSVTRIM_DSORT_KEY1=1 CSVTRIM_DSORT_KEY2=2 for self-testing
 _test_data=$(cat <<'EOF'
@@ -161,9 +157,9 @@ EOF
 }
 
 function cleanup() {
-    [[ "${CSVTRIM_DEBUG}" ]] && \
-        log_info "Tmp files kept: tmp_file1='${tmp_file1}', tmp_file2='${tmp_file2}'" \
-            || rm -f $tmp_file1 $tmp_file2
+    [[ "${CSVTRIM_DEBUG}" ]] &&
+        log_debug "Tmp files kept: tmp_file1=${tmp_file1}, tmp_file2=${tmp_file2}, tmp_file3=${tmp_file3}" || \
+            rm -f $tmp_file3 $tmp_file1 $tmp_file2
 }
 
 trap cleanup EXIT SIGINT SIGTERM
@@ -192,7 +188,7 @@ function trim () {
 
     if [ "$trim_trail" ]; then
         [[ "$trim_trail" =~ ^[[:digit:]]+$ ]] || {
-            log_error "${trim_trail}: number of traling lines to trim is not an integer"
+            log_error "${trim_trail}: number of trailing lines to trim is not an integer"
             return  1
         }
         head_opts="-n-${trim_trail}"
@@ -201,7 +197,7 @@ function trim () {
     # escape any separator characters that might appear in quoted fields (yep,
     # that's legal for CSV files) --
     # <https://unix.stackexchange.com/questions/48672/only-remove-commas-embedded-within-quotes-in-a-comma-delimited-file>.
-    # Double quotes are removed as well. Single quotes must stay becasue
+    # Double quotes are removed as well. Single quotes must stay because
     # adjacent financial-style numerals such as 1'234.5;6'789.0 would make a
     # fake quoted string '234.5;6'.
     local quotes="\""
@@ -323,6 +319,10 @@ if [[ "${input_csv}" ]]; then
         usage
         exit 1
     }
+    cat $input_csv > $tmp_file3 || {
+        log_error "${input_csv}: can't write to tmp file"
+        exit 1
+    }
 else
     log_error "Input CSV file missing."
     usage
@@ -331,7 +331,12 @@ fi
 
 log_debug "fields=${fields}"
 
-trim $fs $fields $input_csv $trim_trail || {
+grep -q $'\r$' $tmp_file3 && {
+    log_error "${input_csv}: input file has DOS-style EOL. Please, fix it and rerun"
+    exit 1
+}
+
+trim $fs $fields $tmp_file3 $trim_trail || {
     log_error "${input_csv}: processing failed..."
     exit 1
 }

From 2aea5491dba1d580b84fb5f0469ef033ece8d684 Mon Sep 17 00:00:00 2001
From: "Marco Emilio \"sphakka\" Poleggi"
 <7766137+sphakka@users.noreply.github.com>
Date: Wed, 9 Apr 2025 15:53:08 +0200
Subject: [PATCH 6/9] dev(test): fixed header in test file for mapping
 ubs-ch-fr

Signed-off-by: Marco Emilio "sphakka" Poleggi <7766137+sphakka@users.noreply.github.com>
---
 data/test/ubs-ch-fr_trimmed.csv |  2 +-
 tests/test_cli.py               | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/data/test/ubs-ch-fr_trimmed.csv b/data/test/ubs-ch-fr_trimmed.csv
index 330c204..7758234 100644
--- a/data/test/ubs-ch-fr_trimmed.csv
+++ b/data/test/ubs-ch-fr_trimmed.csv
@@ -1,4 +1,4 @@
-Produit;Monn.;Description;Date de valeur;Description 1;Description 2;Description 3;N° de transaction;Débit;Crédit;Solde
+Produit;Monn.;Description;Date de comptabilisation;Description 1;Description 2;Description 3;N° de transaction;Débit;Crédit;Solde
 0123 45678901.23A;CHF;Compte personnel UBS;31.03.2019;Solde prix prestations;;;A01234BC01234567;10.00;;11'373.94
 0123 45678901.23A;CHF;Compte personnel UBS;28.02.2019;Virement postal;ASSOCIATION FOO-BAR;BVD DE QUELQUE-PART 1, 1201 GENEVE, CH;3456789ZT1234567;;240.00;11'613.94
 0123 45678901.23A;CHF;Compte personnel UBS;27.04.2019;Ordre e-banking;REMB-CASH;Quuz-baz SàrL, CH - 1203 GENEVE, E-Banking CHF intérieur;9979360TI2115087;200.00;;11'413.94
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 2ac0023..37d968d 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -43,11 +43,11 @@
         "pcmastercard.csv",
         "pcmastercard.ofx",
     ),
-    # (
-    #     # N.B. input file obtained by pre-processing with
-    #     #    bin/csvtrim ubs-ch-fr.csv > ubs-ch-fr_trimmed.csv
-    #     ["-oq", "-m ubs-ch-fr"], "ubs-ch-fr_trimmed.csv", "ubs-ch-fr.qif"
-    # ),
+    (
+        # N.B. input file obtained by pre-processing with
+        #    bin/csvtrim ubs-ch-fr.csv > ubs-ch-fr_trimmed.csv
+        ["-oq", "-m ubs-ch-fr"], "ubs-ch-fr_trimmed.csv", "ubs-ch-fr.qif"
+    ),
     (
         ["-o", "-m ingesp", "-e 20221231"],
         "ingesp.csv",
@@ -139,7 +139,7 @@ def test_sample(opts, in_filename, out_filename, capsys, monkeypatch):
     command = list(itertools.chain(['csv2ofx'], flatten_opts(opts), arguments))
     with pytest.raises(SystemExit) as exc:
         csv2ofx.main.run(command[1:])
-    # Success - exit code 0
+        # Success - exit code 0
     assert exc.value.code == 0
 
     expected = data.joinpath("converted", out_filename).read_text(encoding='utf-8')

From 66b86871505626fca5b946b9ffa64fa13c69b034 Mon Sep 17 00:00:00 2001
From: "Jason R. Coombs" <jaraco@jaraco.com>
Date: Sun, 27 Jul 2025 11:17:20 -0400
Subject: [PATCH 7/9] Restore indentation.

---
 tests/test_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 37d968d..1b5a452 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -139,7 +139,7 @@ def test_sample(opts, in_filename, out_filename, capsys, monkeypatch):
     command = list(itertools.chain(['csv2ofx'], flatten_opts(opts), arguments))
     with pytest.raises(SystemExit) as exc:
         csv2ofx.main.run(command[1:])
-        # Success - exit code 0
+    # Success - exit code 0
     assert exc.value.code == 0
 
     expected = data.joinpath("converted", out_filename).read_text(encoding='utf-8')

From 23992b00026fe145ddba611f4bb698c78ef72dc5 Mon Sep 17 00:00:00 2001
From: "Jason R. Coombs" <jaraco@jaraco.com>
Date: Sun, 27 Jul 2025 11:20:21 -0400
Subject: [PATCH 8/9] Apply ruff formatting.

---
 tests/test_cli.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 1b5a452..41051f3 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -46,7 +46,9 @@
     (
         # N.B. input file obtained by pre-processing with
         #    bin/csvtrim ubs-ch-fr.csv > ubs-ch-fr_trimmed.csv
-        ["-oq", "-m ubs-ch-fr"], "ubs-ch-fr_trimmed.csv", "ubs-ch-fr.qif"
+        ["-oq", "-m ubs-ch-fr"],
+        "ubs-ch-fr_trimmed.csv",
+        "ubs-ch-fr.qif",
     ),
     (
         ["-o", "-m ingesp", "-e 20221231"],

From 27198fd2e6deceea357b4c785c7540ccb32ea4d3 Mon Sep 17 00:00:00 2001
From: "Marco Emilio \"sphakka\" Poleggi"
 <7766137+sphakka@users.noreply.github.com>
Date: Sun, 16 Nov 2025 19:15:03 +0100
Subject: [PATCH 9/9] Workaround for upstream bug
 https://github.com/python/cpython/issues/141631

---
 csv2ofx/mappings/ubs-ch-fr.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/csv2ofx/mappings/ubs-ch-fr.py b/csv2ofx/mappings/ubs-ch-fr.py
index d846838..3e3ebf4 100644
--- a/csv2ofx/mappings/ubs-ch-fr.py
+++ b/csv2ofx/mappings/ubs-ch-fr.py
@@ -21,12 +21,16 @@
 
 """
 
-# Financial numbers are expressed as "2'045.56" in de/fr/it_CH (utf8 has some
-# glitches, so we go for the default one)
+# Financial numbers are expressed as "2'045.56" in de/fr/it_CH
 import locale
 from operator import itemgetter
 
-locale.setlocale(locale.LC_NUMERIC, 'fr_CH')
+locale.setlocale(locale.LC_NUMERIC, 'fr_CH.utf8')
+# Possible bug workaround <https://github.com/python/cpython/issues/141631>
+locale._override_localeconv = {
+    'mon_thousands_sep': "'",
+    'thousands_sep': "'"
+}
 
 __author__ = 'Marco "sphakka" Poleggi'