From 3000862b8695d2fe2ebe051e452321f0c4e049f8 Mon Sep 17 00:00:00 2001 From: "Marco Emilio \"sphakka\" Poleggi" <7766137+sphakka@users.noreply.github.com> Date: Thu, 4 Apr 2024 20:46:47 +0200 Subject: [PATCH 1/9] fix(qif): restored correct handling of 'date_fmt', better CSV normalization with csvtrim Signed-off-by: Marco Emilio "sphakka" Poleggi <7766137+sphakka@users.noreply.github.com> --- csv2ofx/utilz/csvtrim | 132 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 115 insertions(+), 17 deletions(-) diff --git a/csv2ofx/utilz/csvtrim b/csv2ofx/utilz/csvtrim index 68ea3a0..ed31033 100755 --- a/csv2ofx/utilz/csvtrim +++ b/csv2ofx/utilz/csvtrim @@ -8,12 +8,39 @@ ################################################################################ [ "$DEBUG" ] && set -x set -o pipefail +shopt -s expand_aliases __author__='Marco "sphakka" Poleggi' +__version__='0.1.1' myself=$(basename $0) + +# defaults dfields='4,6,9,12-16,19-21' -dseparator=';' +dfs=';' +dtrmtln=3 + +# alternatives to use for quoted fields with rogue fs characters +declare -A fs_replacements=( + [;]=',' + [:]=',' + [,]=';' +) + +alias log_error='echo >&2 "[error] ${FUNCNAME}>"' +alias log_info='echo >&2 "[info] ${FUNCNAME}>"' +alias log_warn='echo >&2 "[warn] ${FUNCNAME}>"' + +tmp_file1=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || { + log_error "Can't create tmp file" + exit 1 +} + +tmp_file2=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || { + log_error "Can't create file" + exit 1 +} + ################################################################################ @@ -21,13 +48,13 @@ usage() { echo >&2 " Usage: - $myself CSV_FILE [FIELDS [SEPARATOR]] + $myself CSV_FILE [FIELDS [FS]] where - CSV_FILE: path to an existing file or '-' for stdin - FIELDS: cut-style list of fields to keep. Default: '$dfields' - SEPARATOR: a single (escaped) character. Default: '$dseparator' + CSV_FILE path to an existing file or '-' for stdin + FIELDS cut-style list of fields to keep. Default: '$dfields' + FS a single (escaped) character as field separator. Default: '$dfs' (default values are for exports from UBS CH (DE/FR/IT)) @@ -39,41 +66,112 @@ e.g. exit 1 } -trap '[ $? -ne 0 ] && usage' EXIT +function cleanup() { + [ $? -ne 0 ] && usage + [[ "$DEBUG" ]] && log_info "tmp file kept: tmp_file1=$tmp_file1, tmp_file2=$tmp_file2" \ + || rm -f $tmp_file1 $tmp_file2 +} + + +# trap '[ $? -ne 0 ] && usage' EXIT +trap cleanup EXIT SIGINT SIGTERM input_csv=${1:?'arg #1 missing: input CSV file'} fields=${2:-$dfields} -separator=${3:-$dseparator} +fs=${3:-$dfs} +trmtln=${3:-$dtrmtln} -function _trim () { - local dlmtrc=${1:?'arg #1 missing: delimiter character'} +# Count the number of field separator in a string +function count_fs() { + local string=${1:?'arg #1 missing: input string'} + local fs=${2:?'arg #2 missing: field separator'} + + fsn=$(echo "${string}" | sed -nr "s/${fs}/\n/g p" | wc -l) || { + log_error "sed filter failed" + return 1 + } + echo $((fsn - 1)) +} + + +function trim () { + local fs=${1:?'arg #1 missing: field delimiter character'} local fields=${2:?'arg #2 missing: cut-style fields to keep'} local incsvf=${3:?'arg #3 missing: input CSV file'} - local trmtln=${4:-'3'} # number of trailing lines to trim + local trmtln=${4:?'arg #4 missing: number of trailing lines to trim'} local head_opts= if [ "$trmtln" ]; then [[ "$trmtln" =~ ^[[:digit:]]+$ ]] || { - echo >&2 "[error] ${trmtln}: number of traling lines to trim is not an integer" + log_error "${trmtln}: number of traling lines to trim is not an integer" return 1 } head_opts="-n-${trmtln}" fi + # escape any separator characters that might appear in quoted fields (yep, + # that's legal for CSV files) -- + # . + # Quotes are remove as well. + local quotes="\"'" + local fs_repl=${fs_replacements[${fs}]} || { + log_error "[bug] no replacement configured for fs '${fs}'. Please correct the 'fs_replacements' array" + return 1 + } + sed -r ":a; + s/([${quotes}])([^${quotes}${fs}]*)${fs}(.*?)([${quotes}])/\1\2${fs_repl}\3\4/; + ta; + s/[${quotes}]//g" $incsvf > $tmp_file1 || { + log_error "can't treat quoted field(s)" + return 1 + } + + # normalize field number where possible -- remove trailing fs, append + # fs. The correct number of fields is inferred by the first (supposedly + # the header) line. + local header=$(head -n1 $tmp_file1 | sed -r "s/${fs}*$//g") + log_info "header (possibly fixed): ${header}" + fsn=$(count_fs "${header}" $fs) || { + log_error "can't compute the header's field number" + return 1 + } + log_info "header FS count: ${fsn}" + + echo $header > $tmp_file2 + + tail -n+2 $tmp_file1 | while read line; do + local lfs=$(count_fs "${line}" $fs) + local fsd=$(( $lfs - $fsn )) + # ~abs + local afsd=${fsd#-} + # repeat extra fs + local xfs=$(printf "${fs}%.0s" $(eval "echo {1..${afsd}}")) + if [[ $fsd -lt 0 ]]; then + fsd=${fsd#-} + line+=$xfs + log_info "'${line}': fixed, +${fsd} FS" + elif [[ $fsd -gt 0 ]]; then + # trim at the tail + line=${line%${xfs}} + log_info "'${line}': fixed, -${fsd} FS" + fi + echo $line >> $tmp_file2 + done + # trnxs detailed as "Solde prix prestations" are split with a # "Sous-montant" value, but empty "Débit; Crédit; Solde" columns (the # trailing three). To avoid breaking csv2ofx, these must be filtered - # out... The kludge is to skip rows ending with 3 consecutive delimiter chars - head $head_opts $incsvf | cut -d$dlmtrc -f$fields | \ - sed -nr "/${dlmtrc}${dlmtrc}${dlmtrc}\s*$/ !p" || { - echo >&2 "[error] ${incsvf}: can't filter input file" + # out... The kludge is to skip rows ending with an empty field + head $head_opts $tmp_file2 | cut -d$fs -f$fields | \ + sed -nr "/${fs}\s*$/ !p" || { + log_error "can't filter input file" return 1 } } -_trim $separator $fields $input_csv || { - echo >&2 "[error] ${input_csv}: trimming failed..." +trim $fs $fields $input_csv $trmtln || { + log_error "${input_csv}: trimming failed..." exit 1 } From 7432ca83076b85cc8556a04af987b7ae3219bf29 Mon Sep 17 00:00:00 2001 From: "Marco Emilio \"sphakka\" Poleggi" <7766137+sphakka@users.noreply.github.com> Date: Fri, 5 Apr 2024 12:41:21 +0200 Subject: [PATCH 2/9] dev(utilz): self-testing capabilities for csvtrim Signed-off-by: Marco Emilio "sphakka" Poleggi <7766137+sphakka@users.noreply.github.com> --- csv2ofx/utilz/csvtrim | 48 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/csv2ofx/utilz/csvtrim b/csv2ofx/utilz/csvtrim index ed31033..6d42c5a 100755 --- a/csv2ofx/utilz/csvtrim +++ b/csv2ofx/utilz/csvtrim @@ -6,7 +6,7 @@ # # TO-DO: integrate in csv2ofx? ################################################################################ -[ "$DEBUG" ] && set -x +[[ "$CSVTRIM_DEBUG" ]] && set -x set -o pipefail shopt -s expand_aliases @@ -16,9 +16,9 @@ __version__='0.1.1' myself=$(basename $0) # defaults -dfields='4,6,9,12-16,19-21' -dfs=';' -dtrmtln=3 +dfields=${CSVTRIM_FIELDS:-'4,6,9,12-16,19-21'} +dfs=${CSVTRIM_DFS:-';'} +dtrmtln=${CSVTRIM_DTRMTLN-3} # alternatives to use for quoted fields with rogue fs characters declare -A fs_replacements=( @@ -42,19 +42,46 @@ tmp_file2=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || { } +_test_data=$(cat <<'EOF' +header;with;four;fields;; +too;many;fields;here;;; +too;'many; also';there;;; +"too,few; fields";here +line;is;a;split +split;1 +split;2;;; +this;line;is;OK +and;this too;is;right +EOF +) + +[[ $CSVTRIM_SELFTEST ]] && { + unset CSVTRIM_SELFTEST + cmd="$0 - 1- ; 0" + log_info "Self-testing with command '${cmd}'" + echo "${_test_data}" | exec $cmd && { + log_info "Self-test OK :-)" + exit 0 + } + log_info "Self-test KO :-(" + exit 1 +} + + ################################################################################ usage() { echo >&2 " Usage: - $myself CSV_FILE [FIELDS [FS]] + $myself CSV_FILE [FIELDS [FS [TRIM_NLINES]]] where - CSV_FILE path to an existing file or '-' for stdin - FIELDS cut-style list of fields to keep. Default: '$dfields' - FS a single (escaped) character as field separator. Default: '$dfs' + CSV_FILE path to an existing file or '-' for stdin + FIELDS cut-style list of fields to keep. Default: '$dfields' + FS a single (escaped) character as field separator. Default: '$dfs' + TRIM_NLINES discard N trailing lines (default values are for exports from UBS CH (DE/FR/IT)) @@ -68,7 +95,8 @@ e.g. function cleanup() { [ $? -ne 0 ] && usage - [[ "$DEBUG" ]] && log_info "tmp file kept: tmp_file1=$tmp_file1, tmp_file2=$tmp_file2" \ + [[ "$DEBUG" ]] && \ + log_info "tmp file kept: tmp_file1=$tmp_file1, tmp_file2=$tmp_file2" \ || rm -f $tmp_file1 $tmp_file2 } @@ -79,7 +107,7 @@ trap cleanup EXIT SIGINT SIGTERM input_csv=${1:?'arg #1 missing: input CSV file'} fields=${2:-$dfields} fs=${3:-$dfs} -trmtln=${3:-$dtrmtln} +trmtln=${4:-$dtrmtln} # Count the number of field separator in a string From bdaecd4b79dc159f6417bec0fa913f6ddc198496 Mon Sep 17 00:00:00 2001 From: "Marco Emilio \"sphakka\" Poleggi" <7766137+sphakka@users.noreply.github.com> Date: Tue, 16 Jul 2024 14:21:22 +0200 Subject: [PATCH 3/9] dev(utilz): csvtrim reworked around getopt with fixes for single quotes. +output sorting Signed-off-by: Marco Emilio "sphakka" Poleggi <7766137+sphakka@users.noreply.github.com> --- csv2ofx/utilz/csvtrim | 277 +++++++++++++++++++++++++++++++----------- 1 file changed, 204 insertions(+), 73 deletions(-) diff --git a/csv2ofx/utilz/csvtrim b/csv2ofx/utilz/csvtrim index 6d42c5a..9f85e0d 100755 --- a/csv2ofx/utilz/csvtrim +++ b/csv2ofx/utilz/csvtrim @@ -6,7 +6,7 @@ # # TO-DO: integrate in csv2ofx? ################################################################################ -[[ "$CSVTRIM_DEBUG" ]] && set -x +[[ "$CSVTRIM_TRACE" ]] && set -x set -o pipefail shopt -s expand_aliases @@ -18,7 +18,17 @@ myself=$(basename $0) # defaults dfields=${CSVTRIM_FIELDS:-'4,6,9,12-16,19-21'} dfs=${CSVTRIM_DFS:-';'} -dtrmtln=${CSVTRIM_DTRMTLN-3} +dtrim_trail=${CSVTRIM_DTRIM_TRAIL-3} +dsort_key1=${CSVTRIM_DSORT_KEY1-4} +dsort_key2=${CSVTRIM_DSORT_KEY2-8} + +# option vars +fields=${dfields} +fs=${dfs} +trim_trail=${dtrim_trail} +sort_key1=${dsort_key1} +sort_key2=${dsort_key2} + # alternatives to use for quoted fields with rogue fs characters declare -A fs_replacements=( @@ -27,9 +37,47 @@ declare -A fs_replacements=( [,]=';' ) +# CSV columns' index-label crossref for UBS-FR -- just informational for +# now. Sorting is indeed made on numerical keys. +declare -A cols_in_ubs_fr=( + [4]='Produit' + [6]='Monn.' + [9]='Description' + [12]='Date de valeur' + [13]='Description 1' + [14]='Description 2' + [15]='Description 3' + [16]='N° de transaction' + [19]='Débit' + [20]='Crédit' + [21]='Solde' +) + +declare -A cols_out_ubs_fr=( + [1]='Produit' + [2]='Monn.' + [3]='Description' + [4]='Date de valeur' + [5]='Description 1' + [6]='Description 2' + [7]='Description 3' + [8]='N° de transaction' + [9]='Débit' + [10]='Crédit' + [11]='Solde' +) +# Primary and secondary ouput sorting keys indices +sort_key1=$dsort_key1 +sort_key2=$dsort_key2 + alias log_error='echo >&2 "[error] ${FUNCNAME}>"' alias log_info='echo >&2 "[info] ${FUNCNAME}>"' alias log_warn='echo >&2 "[warn] ${FUNCNAME}>"' +if [[ $CSVTRIM_DEBUG ]]; then + alias log_debug='echo >&2 "[debug] ${FUNCNAME}>"' +else + alias log_debug=':' +fi tmp_file1=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || { log_error "Can't create tmp file" @@ -42,73 +90,83 @@ tmp_file2=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || { } +# 6 columns. Use CSVTRIM_DSORT_KEY1=1 CSVTRIM_DSORT_KEY2=2 for self-testing _test_data=$(cat <<'EOF' -header;with;four;fields;; -too;many;fields;here;;; -too;'many; also';there;;; -"too,few; fields";here -line;is;a;split -split;1 -split;2;;; -this;line;is;OK -and;this too;is;right +date;trxnid;header;with;six;fields;; +02.03.2024;txn-07;will stay;with;3 fields;less;;; +01.02.2024;txn-01;will;"stay; also";with 1;field less; +01.02.2024;txn-03;"too,few; fields";here +01.02.2024;txn-02;this split;will stay;;1'234.5 +01.02.2024;txn-02;comment is removed;;; +01.02.2024;txn-02;split-1 is removed +01.02.2024;txn-02;split-2 is removed;;; +04.03.2024;txn-05;this line;is;really;OK +07.01.2024;txn-04;this too short by one;removed; as a split +08.02.2024;txn-06;thousand separator; is not a quotation mark;1'234.5;6'789.0 EOF ) -[[ $CSVTRIM_SELFTEST ]] && { - unset CSVTRIM_SELFTEST - cmd="$0 - 1- ; 0" - log_info "Self-testing with command '${cmd}'" - echo "${_test_data}" | exec $cmd && { - log_info "Self-test OK :-)" - exit 0 - } - log_info "Self-test KO :-(" - exit 1 -} +_test_results=$(cat <<'EOF' +date;trxnid;header;with;six;fields +01.02.2024;txn-01;will;stay, also;with 1;field less +01.02.2024;txn-02;this split;will stay;;1'234.5 +08.02.2024;txn-06;thousand separator; is not a quotation mark;1'234.5;6'789.0 +02.03.2024;txn-07;will stay;with;3 fields;less +04.03.2024;txn-05;this line;is;really;OK +EOF +) ################################################################################ usage() { - echo >&2 " + cat >&2 <. - # Quotes are remove as well. - local quotes="\"'" + # Double quotes are removed as well. Single quotes must stay becasue + # adjacent financial-style numerals such as 1'234.5;6'789.0 would make a + # fake quoted string '234.5;6'. + local quotes="\"" local fs_repl=${fs_replacements[${fs}]} || { - log_error "[bug] no replacement configured for fs '${fs}'. Please correct the 'fs_replacements' array" + log_error "[BUG] no replacement configured for fs '${fs}'. Please correct the 'fs_replacements' array" return 1 } - sed -r ":a; + head $head_opts $incsvf | \ + sed -r ":a; s/([${quotes}])([^${quotes}${fs}]*)${fs}(.*?)([${quotes}])/\1\2${fs_repl}\3\4/; ta; - s/[${quotes}]//g" $incsvf > $tmp_file1 || { - log_error "can't treat quoted field(s)" + s/[${quotes}]//g" > $tmp_file1 || { + log_error "Can't treat quoted field(s)" return 1 } @@ -160,46 +221,116 @@ function trim () { # fs. The correct number of fields is inferred by the first (supposedly # the header) line. local header=$(head -n1 $tmp_file1 | sed -r "s/${fs}*$//g") - log_info "header (possibly fixed): ${header}" + log_info "Header (possibly fixed): '${header}'" fsn=$(count_fs "${header}" $fs) || { log_error "can't compute the header's field number" return 1 } - log_info "header FS count: ${fsn}" + log_info "FS count: ${fsn} => we have $(($fsn + 1)) fields" echo $header > $tmp_file2 + local ln=2 tail -n+2 $tmp_file1 | while read line; do local lfs=$(count_fs "${line}" $fs) local fsd=$(( $lfs - $fsn )) - # ~abs + # ~abs() local afsd=${fsd#-} # repeat extra fs local xfs=$(printf "${fs}%.0s" $(eval "echo {1..${afsd}}")) if [[ $fsd -lt 0 ]]; then + # mostly useless as it would create a (fake?) split and get + # removed lateer, but we keep the code because in the future we + # might introduce an explicit remove split option fsd=${fsd#-} line+=$xfs - log_info "'${line}': fixed, +${fsd} FS" + log_info "Line ${ln}: fixed, +${fsd} FS: ${line}" elif [[ $fsd -gt 0 ]]; then # trim at the tail line=${line%${xfs}} - log_info "'${line}': fixed, -${fsd} FS" + log_info "Line ${ln}: fixed, -${fsd} FS: ${line}" + fi + + # Consecutive lines bearing the same "N. de transaction" (field #16) are + # splits with following lines bearing a "Sous-montant" (field #18) but + # empty "Débit; Crédit; Solde" fields (the trailing three, #19-21). To + # avoid breaking csv2ofx, these must be filtered out... The kludge is to + # skip rows ending with an empty field + if [[ "$line" =~ ${fs}[[:blank:]]*$ ]]; then + log_info "Line ${ln}: split ignored: ${line}" + else + echo $line >> $tmp_file2 fi - echo $line >> $tmp_file2 + ((ln++)) done + log_info "Normalize OK" + + # Sort: key1 is a date like 'dd.mm.yyyy'. To sort correctly, we use the + # reverse order (ISO-like) year, month, day. + cut -d${fs} -f${fields} $tmp_file2 | \ + sort -t';' \ + -k${sort_key1}.7n,${sort_key1}.10n \ + -k${sort_key1}.4n,${sort_key1}.5n \ + -k${sort_key1}.1n,${sort_key1}.2n \ + -k${sort_key2} || { + log_error "Can't trim columns (#${fields}) or sort" + return 1 + } + log_info "Trim OK" +} - # trnxs detailed as "Solde prix prestations" are split with a - # "Sous-montant" value, but empty "Débit; Crédit; Solde" columns (the - # trailing three). To avoid breaking csv2ofx, these must be filtered - # out... The kludge is to skip rows ending with an empty field - head $head_opts $tmp_file2 | cut -d$fs -f$fields | \ - sed -nr "/${fs}\s*$/ !p" || { - log_error "can't filter input file" - return 1 - } +################################################################################ +# main +################################################################################ + +while getopts "f:hs:t:1:2:" opt; do + case "$opt" in + h) usage; exit 0 ;; + f) fields=${OPTARG} ;; + s) fs=${OPTARG} ;; + t) trim_trail=${OPTARG} ;; + 1) sort_key1=${OPTARG} ;; + 2) sort_key2=${OPTARG} ;; + -) break ;; + *) usage; exit 1 ;; + esac +done +shift $((OPTIND - 1)) + + +[[ $CSVTRIM_SELFTEST ]] && { + unset CSVTRIM_SELFTEST + cmd="$0 -f 1- -s ; -t 0 -1 1 -2 2 -" + log_info "Self-testing with command '${cmd}'" + result=$(echo "${_test_data}" | exec $cmd) || { + log_error "Can't run self-test" + exit 1 + } + [[ "$result" == "$_test_results" ]] || { + log_error "Self-test KO :-(" + exit 1 + } + log_info "Self-test OK :-)" + exit 0 } -trim $fs $fields $input_csv $trmtln || { - log_error "${input_csv}: trimming failed..." +input_csv=$1 + +if [[ "${input_csv}" ]]; then + [[ "${input_csv}" == '-' || -r "${input_csv}" ]] || { + log_error "${input_csv}: input CSV file unreadable" + usage + exit 1 + } +else + log_error "Input CSV file missing." + usage + exit 1 +fi + +log_debug "fields=${fields}" + +trim $fs $fields $input_csv $trim_trail || { + log_error "${input_csv}: processing failed..." exit 1 } From 8cdf29897ad9e5871a32b5857279f16886a77471 Mon Sep 17 00:00:00 2001 From: "Marco Emilio \"sphakka\" Poleggi" <7766137+sphakka@users.noreply.github.com> Date: Tue, 16 Jul 2024 15:33:21 +0200 Subject: [PATCH 4/9] dev(utilz): use 'comptabilisation' instead of 'value' to avoid getting txns predating the period's beginning Signed-off-by: Marco Emilio "sphakka" Poleggi <7766137+sphakka@users.noreply.github.com> --- csv2ofx/mappings/ubs-ch-fr.py | 2 +- csv2ofx/utilz/csvtrim | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/csv2ofx/mappings/ubs-ch-fr.py b/csv2ofx/mappings/ubs-ch-fr.py index a12cd24..d846838 100644 --- a/csv2ofx/mappings/ubs-ch-fr.py +++ b/csv2ofx/mappings/ubs-ch-fr.py @@ -69,7 +69,7 @@ def map_payee(tr): "notes": itemgetter("Description 2"), # switch day/month (maybe file a bug: always inverted when ambiguous like # '01.02.2018') - "date": lambda tr: fixdate(tr["Date de valeur"]), + "date": lambda tr: fixdate(tr["Date de comptabilisation"]), "desc": map_descr, "payee": map_payee, "check_num": itemgetter("N° de transaction"), diff --git a/csv2ofx/utilz/csvtrim b/csv2ofx/utilz/csvtrim index 9f85e0d..28776ad 100755 --- a/csv2ofx/utilz/csvtrim +++ b/csv2ofx/utilz/csvtrim @@ -16,7 +16,7 @@ __version__='0.1.1' myself=$(basename $0) # defaults -dfields=${CSVTRIM_FIELDS:-'4,6,9,12-16,19-21'} +dfields=${CSVTRIM_FIELDS:-'4,6,9,11,13-16,19-21'} dfs=${CSVTRIM_DFS:-';'} dtrim_trail=${CSVTRIM_DTRIM_TRAIL-3} dsort_key1=${CSVTRIM_DSORT_KEY1-4} @@ -43,7 +43,8 @@ declare -A cols_in_ubs_fr=( [4]='Produit' [6]='Monn.' [9]='Description' - [12]='Date de valeur' + [11]='Date de comptabilisation' + # [12]='Date de valeur' [13]='Description 1' [14]='Description 2' [15]='Description 3' @@ -57,7 +58,7 @@ declare -A cols_out_ubs_fr=( [1]='Produit' [2]='Monn.' [3]='Description' - [4]='Date de valeur' + [4]='Date de comptabilisation' [5]='Description 1' [6]='Description 2' [7]='Description 3' From fe8ab8d962fdfa63afd9deb3432e6c69129e2e1a Mon Sep 17 00:00:00 2001 From: "Marco Emilio \"sphakka\" Poleggi" <7766137+sphakka@users.noreply.github.com> Date: Tue, 22 Oct 2024 16:20:26 +0200 Subject: [PATCH 5/9] dev(utilz): csvtrim now checks for DOS-style EOL Signed-off-by: Marco Emilio "sphakka" Poleggi <7766137+sphakka@users.noreply.github.com> --- csv2ofx/utilz/csvtrim | 39 ++++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/csv2ofx/utilz/csvtrim b/csv2ofx/utilz/csvtrim index 28776ad..a159e58 100755 --- a/csv2ofx/utilz/csvtrim +++ b/csv2ofx/utilz/csvtrim @@ -11,7 +11,7 @@ set -o pipefail shopt -s expand_aliases __author__='Marco "sphakka" Poleggi' -__version__='0.1.1' +__version__='0.1.2' myself=$(basename $0) @@ -80,16 +80,12 @@ else alias log_debug=':' fi -tmp_file1=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || { - log_error "Can't create tmp file" - exit 1 -} - -tmp_file2=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || { - log_error "Can't create file" - exit 1 -} - +for i in {1..3}; do + declare tmp_file${i}=$(mktemp --tmpdir=/tmp -t "${myself}-XXXXXX") || { + log_error "Can't create tmp file #${i}" + exit 1 + } +done # 6 columns. Use CSVTRIM_DSORT_KEY1=1 CSVTRIM_DSORT_KEY2=2 for self-testing _test_data=$(cat <<'EOF' @@ -161,9 +157,9 @@ EOF } function cleanup() { - [[ "${CSVTRIM_DEBUG}" ]] && \ - log_info "Tmp files kept: tmp_file1='${tmp_file1}', tmp_file2='${tmp_file2}'" \ - || rm -f $tmp_file1 $tmp_file2 + [[ "${CSVTRIM_DEBUG}" ]] && + log_debug "Tmp files kept: tmp_file1=${tmp_file1}, tmp_file2=${tmp_file2}, tmp_file3=${tmp_file3}" || \ + rm -f $tmp_file3 $tmp_file1 $tmp_file2 } trap cleanup EXIT SIGINT SIGTERM @@ -192,7 +188,7 @@ function trim () { if [ "$trim_trail" ]; then [[ "$trim_trail" =~ ^[[:digit:]]+$ ]] || { - log_error "${trim_trail}: number of traling lines to trim is not an integer" + log_error "${trim_trail}: number of trailing lines to trim is not an integer" return 1 } head_opts="-n-${trim_trail}" @@ -201,7 +197,7 @@ function trim () { # escape any separator characters that might appear in quoted fields (yep, # that's legal for CSV files) -- # . - # Double quotes are removed as well. Single quotes must stay becasue + # Double quotes are removed as well. Single quotes must stay because # adjacent financial-style numerals such as 1'234.5;6'789.0 would make a # fake quoted string '234.5;6'. local quotes="\"" @@ -323,6 +319,10 @@ if [[ "${input_csv}" ]]; then usage exit 1 } + cat $input_csv > $tmp_file3 || { + log_error "${input_csv}: can't write to tmp file" + exit 1 + } else log_error "Input CSV file missing." usage @@ -331,7 +331,12 @@ fi log_debug "fields=${fields}" -trim $fs $fields $input_csv $trim_trail || { +grep -q $'\r$' $tmp_file3 && { + log_error "${input_csv}: input file has DOS-style EOL. Please, fix it and rerun" + exit 1 +} + +trim $fs $fields $tmp_file3 $trim_trail || { log_error "${input_csv}: processing failed..." exit 1 } From 2aea5491dba1d580b84fb5f0469ef033ece8d684 Mon Sep 17 00:00:00 2001 From: "Marco Emilio \"sphakka\" Poleggi" <7766137+sphakka@users.noreply.github.com> Date: Wed, 9 Apr 2025 15:53:08 +0200 Subject: [PATCH 6/9] dev(test): fixed header in test file for mapping ubs-ch-fr Signed-off-by: Marco Emilio "sphakka" Poleggi <7766137+sphakka@users.noreply.github.com> --- data/test/ubs-ch-fr_trimmed.csv | 2 +- tests/test_cli.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/data/test/ubs-ch-fr_trimmed.csv b/data/test/ubs-ch-fr_trimmed.csv index 330c204..7758234 100644 --- a/data/test/ubs-ch-fr_trimmed.csv +++ b/data/test/ubs-ch-fr_trimmed.csv @@ -1,4 +1,4 @@ -Produit;Monn.;Description;Date de valeur;Description 1;Description 2;Description 3;N° de transaction;Débit;Crédit;Solde +Produit;Monn.;Description;Date de comptabilisation;Description 1;Description 2;Description 3;N° de transaction;Débit;Crédit;Solde 0123 45678901.23A;CHF;Compte personnel UBS;31.03.2019;Solde prix prestations;;;A01234BC01234567;10.00;;11'373.94 0123 45678901.23A;CHF;Compte personnel UBS;28.02.2019;Virement postal;ASSOCIATION FOO-BAR;BVD DE QUELQUE-PART 1, 1201 GENEVE, CH;3456789ZT1234567;;240.00;11'613.94 0123 45678901.23A;CHF;Compte personnel UBS;27.04.2019;Ordre e-banking;REMB-CASH;Quuz-baz SàrL, CH - 1203 GENEVE, E-Banking CHF intérieur;9979360TI2115087;200.00;;11'413.94 diff --git a/tests/test_cli.py b/tests/test_cli.py index 2ac0023..37d968d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -43,11 +43,11 @@ "pcmastercard.csv", "pcmastercard.ofx", ), - # ( - # # N.B. input file obtained by pre-processing with - # # bin/csvtrim ubs-ch-fr.csv > ubs-ch-fr_trimmed.csv - # ["-oq", "-m ubs-ch-fr"], "ubs-ch-fr_trimmed.csv", "ubs-ch-fr.qif" - # ), + ( + # N.B. input file obtained by pre-processing with + # bin/csvtrim ubs-ch-fr.csv > ubs-ch-fr_trimmed.csv + ["-oq", "-m ubs-ch-fr"], "ubs-ch-fr_trimmed.csv", "ubs-ch-fr.qif" + ), ( ["-o", "-m ingesp", "-e 20221231"], "ingesp.csv", @@ -139,7 +139,7 @@ def test_sample(opts, in_filename, out_filename, capsys, monkeypatch): command = list(itertools.chain(['csv2ofx'], flatten_opts(opts), arguments)) with pytest.raises(SystemExit) as exc: csv2ofx.main.run(command[1:]) - # Success - exit code 0 + # Success - exit code 0 assert exc.value.code == 0 expected = data.joinpath("converted", out_filename).read_text(encoding='utf-8') From 66b86871505626fca5b946b9ffa64fa13c69b034 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 27 Jul 2025 11:17:20 -0400 Subject: [PATCH 7/9] Restore indentation. --- tests/test_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 37d968d..1b5a452 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -139,7 +139,7 @@ def test_sample(opts, in_filename, out_filename, capsys, monkeypatch): command = list(itertools.chain(['csv2ofx'], flatten_opts(opts), arguments)) with pytest.raises(SystemExit) as exc: csv2ofx.main.run(command[1:]) - # Success - exit code 0 + # Success - exit code 0 assert exc.value.code == 0 expected = data.joinpath("converted", out_filename).read_text(encoding='utf-8') From 23992b00026fe145ddba611f4bb698c78ef72dc5 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 27 Jul 2025 11:20:21 -0400 Subject: [PATCH 8/9] Apply ruff formatting. --- tests/test_cli.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 1b5a452..41051f3 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -46,7 +46,9 @@ ( # N.B. input file obtained by pre-processing with # bin/csvtrim ubs-ch-fr.csv > ubs-ch-fr_trimmed.csv - ["-oq", "-m ubs-ch-fr"], "ubs-ch-fr_trimmed.csv", "ubs-ch-fr.qif" + ["-oq", "-m ubs-ch-fr"], + "ubs-ch-fr_trimmed.csv", + "ubs-ch-fr.qif", ), ( ["-o", "-m ingesp", "-e 20221231"], From 27198fd2e6deceea357b4c785c7540ccb32ea4d3 Mon Sep 17 00:00:00 2001 From: "Marco Emilio \"sphakka\" Poleggi" <7766137+sphakka@users.noreply.github.com> Date: Sun, 16 Nov 2025 19:15:03 +0100 Subject: [PATCH 9/9] Workaround for upstream bug https://github.com/python/cpython/issues/141631 --- csv2ofx/mappings/ubs-ch-fr.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/csv2ofx/mappings/ubs-ch-fr.py b/csv2ofx/mappings/ubs-ch-fr.py index d846838..3e3ebf4 100644 --- a/csv2ofx/mappings/ubs-ch-fr.py +++ b/csv2ofx/mappings/ubs-ch-fr.py @@ -21,12 +21,16 @@ """ -# Financial numbers are expressed as "2'045.56" in de/fr/it_CH (utf8 has some -# glitches, so we go for the default one) +# Financial numbers are expressed as "2'045.56" in de/fr/it_CH import locale from operator import itemgetter -locale.setlocale(locale.LC_NUMERIC, 'fr_CH') +locale.setlocale(locale.LC_NUMERIC, 'fr_CH.utf8') +# Possible bug workaround +locale._override_localeconv = { + 'mon_thousands_sep': "'", + 'thousands_sep': "'" +} __author__ = 'Marco "sphakka" Poleggi'