diff --git a/bin/parseRefStrings.pl b/bin/parseRefStrings.pl
index 3cfed23..2495014 100755
--- a/bin/parseRefStrings.pl
+++ b/bin/parseRefStrings.pl
@@ -7,8 +7,12 @@
 # Min-Yen Kan (Thu Feb 28 14:10:28 SGT 2008)
 #  Derived from citeExtract.pl
 #
+# Matthias Bösinger (29.03.2016)
+# -> all changes marked with: MB1
+
 use strict;
 use FindBin;
+use Getopt::Long;
 use lib "$FindBin::Bin/../lib";
 
 use lib "/home/wing.nus/tools/languages/programming/perl-5.10.0/lib/5.10.0";
@@ -16,16 +20,40 @@
 
 use ParsCit::Controller;
 use CSXUtil::SafeText qw(cleanAll cleanXML);
+use ParsCit::ConfigLang;
+
+###
+# set standard encoding to UTF-8 
+# MB1
+###
+binmode STDIN, ":encoding(UTF-8)";
+binmode STDOUT, ":encoding(UTF-8)";
+
+### Get additional parameter (language parameter (english as default), split parameter, keep temp files parameter) - MB1
+my $lang = "en";
+my $split = '';
+my $keep = '';
+if (!GetOptions("lang=s" => \$lang, "split" => \$split, "keep" => \$keep)) {
+	print "Usage: $0 textfile outfile [-lang=en|de] [-split] [-keep]\n";
+    exit;
+}
+# initialize language config
+if (!ParsCit::ConfigLang::Init($lang)) {
+	print "Usage: $0 textfile outfile [-lang=en|de] [-split] [-keep]\n";
+    exit;
+}
+### End (additional parameter) - MB1
 
 my $textFile = $ARGV[0];
 my $outFile = $ARGV[1];
 
 if (!defined $textFile) {
-    print "Usage: $0 textfile [outfile]\n";
+    print "Usage: $0 textfile outfile [-lang=en|de] [-split] [-keep]\n";	# Updated - MB1
     exit;
 }
 
-open (IF, $textFile) || die "Couldn't open text file \"textFile\"!";
+# open (IF, $textFile) || die "Couldn't open text file \"textFile\"!";
+open (IF, "<:utf8", $textFile) || die "Couldn't open text file \"textFile\"!"; 	# set to utf-8-encoding - MB1
 my $normalizedCiteText = "";
 my $line = 0;
 while (<IF>) {
@@ -42,16 +70,16 @@
 }
 
 our $msg = "";
-my $tmpFile = ParsCit::Tr2crfpp::PrepData(\$normalizedCiteText, $textFile);
-my $outFile = $tmpFile."_dec";
+my $tmpFile = ParsCit::Tr2crfpp::PrepData(\$normalizedCiteText, $textFile, $split); 	# Additional parameter 'split' - MB1
+my $outTmpFile = $tmpFile."_dec"; 	# Changed name from '$outFile'. Otherwise name conflict with 'outFile' from $ARGV[1]; - MB1
 my @validCitations = ();
 
 my $xml = "";
 $xml .= "<algorithm name=\"$ParsCit::Config::algorithmName\" version=\"$ParsCit::Config::algorithmVersion\">\n";
 $xml .= "<citationList>\n";
-if (ParsCit::Tr2crfpp::Decode($tmpFile, $outFile)) {
+if (ParsCit::Tr2crfpp::Decode($tmpFile, $outTmpFile)) {
     my ($rRawXML, $rCiteInfo, $tstatus, $tmsg) =
-	ParsCit::PostProcess::ReadAndNormalize($outFile);
+	ParsCit::PostProcess::ReadAndNormalize($outTmpFile);
     if ($tstatus <= 0) {
 	return ($tstatus, $msg, undef, undef);
     }
@@ -60,15 +88,16 @@
 	my %citeInfo = %{$citeInfo[$i]};
 	$xml .= "<citation>\n";
 	foreach my $key (keys %citeInfo) {
-	    if ($key eq "authors" || $key eq "editors") {
-		my $singular = $key;
-		chop $singular;
-		$xml .= "<$key>\n";
-		foreach my $person (@{$citeInfo{$key}}) {
-			cleanAll(\$person);
-		    $xml .= "<$singular>$person</$singular>\n";
-		}
-		$xml .= "</$key>\n";
+	    if ($key eq "authors" || $key eq "editors") 
+	    {
+			my $singular = $key;
+			chop $singular;
+			$xml .= "<$key>\n";
+			foreach my $person (@{$citeInfo{$key}}) {
+				cleanAll(\$person);
+				$xml .= "<$singular>$person</$singular>\n";
+			}
+			$xml .= "</$key>\n";
 	    } 
 		elsif ($key eq "volume") 
 		{
@@ -96,7 +125,24 @@
     $xml .= "</citationList>\n</algorithm>\n";
 }
 
-unlink($tmpFile);
-unlink($outFile);
+###
+# tmp-files are kept if parameter has been set.
+# MB1
+###
+unless ($keep) { 
+	unlink($tmpFile); 
+	unlink($outTmpFile);
+}
+
+###
+# If outFile has been passed as parameter the result .xml will be print to this file.
+# Else the result .xml will be print to standard out.
+# MB1
+###
+if (open(OUT, ">:utf8", $outFile)) {
+	print OUT $xml;
+}
+else {
+	print $xml;
+}
 
-print $xml;
diff --git a/bin/prepData.pl b/bin/prepData.pl
new file mode 100755
index 0000000..8fd82cd
--- /dev/null
+++ b/bin/prepData.pl
@@ -0,0 +1,58 @@
+#!/usr/bin/perl -CSD
+#
+# Script to transform reference strings to crf++ compatible data.
+# By doing so, CRF++ can be used manually with the trasnformed reference data.
+#
+# Derived from 'parseRefStrings.pl'
+#
+# written by Matthias Bösinger (29.03.2016)
+
+use strict;
+use FindBin;
+use lib "$FindBin::Bin/../lib";
+
+use lib "/home/wing.nus/tools/languages/programming/perl-5.10.0/lib/5.10.0";
+use lib "/home/wing.nus/tools/languages/programming/perl-5.10.0/lib/site_perl/5.10.0";
+
+use ParsCit::Controller;
+use CSXUtil::SafeText qw(cleanAll cleanXML);
+
+my $textFile = $ARGV[0];
+my $outFile = $ARGV[1];
+
+if (!defined $textFile || !defined $outFile) {
+    print "Usage: $0 textfile outfile\n";
+    exit;
+}
+
+open (IF, $textFile) || die "Couldn't open text file \"textFile\"!";
+my $normalizedCiteText = "";
+my $line = 0;
+while (<IF>) {
+  chop;
+  # Tr2cfpp needs an enclosing tag for initial class seed.
+  $normalizedCiteText .= "<title> " . $_ . " </title>\n";
+  $line++;
+}
+close (IF);
+
+if ($line == 0) {
+  # Stop - nothing left to do.
+  exit();
+}
+
+our $msg = "";
+my $tmpFile = ParsCit::Tr2crfpp::PrepData(\$normalizedCiteText, $textFile);
+
+open (TF, $tmpFile) || die "Couldn't open tmp file!";
+open (OF, ">$outFile") || die "Couldn't open out file!";
+while (<TF>) {
+	chop;
+	print OF $_ . "\n";
+}
+
+close(TF);
+close(OF);
+
+unlink($tmpFile);
+
diff --git a/bin/tr2crfpp.pl b/bin/tr2crfpp.pl
index c9af840..a9e3541 100755
--- a/bin/tr2crfpp.pl
+++ b/bin/tr2crfpp.pl
@@ -1,5 +1,9 @@
 #!/usr/bin/env perl
 # -*- cperl -*-
+#
+# Matthias Bösinger (29.03.2016)
+# -> all changes marked with: MB1
+
 =head1 NAME
 
 tr2crfpp.pl
@@ -22,7 +26,13 @@ =head1 HISTORY
 require 5.0;
 use Getopt::Std;
 use strict 'vars';
+use utf8;
+use 5.010; 	# MB1
 use FindBin;
+
+binmode STDIN, ":encoding(UTF-8)"; 	# MB1
+binmode STDOUT, ":encoding(UTF-8)"; 	# MB1
+
 # use diagnostics;
 
 ### USER customizable section
@@ -93,21 +103,29 @@ sub License {
 if ($filename = shift) {
  NEWFILE:
   if (!(-e $filename)) { die "# $progname crash\t\tFile \"$filename\" doesn't exist"; }
-  open (*IF, $filename) || die "# $progname crash\t\tCan't open \"$filename\"";
-  $fh = "IF";
+  open (IF, "<:encoding(UTF-8)", $filename) || die "# $progname crash\t\tCan't open \"$filename\""; 	# set encoding to UTF-8 - MB1
+# open (*IF, $filename) || die "# $progname crash\t\tCan't open \"$filename\"";
+# $fh = "IF";
 } else {
   $filename = "<STDIN>";
   $fh = "STDIN";
 }
 
-while (<$fh>) {
+# while (<$fh>) { 	# set encoding to UTF-8 - MB1
+while (<IF>) { 	# set encoding to UTF-8 - MB1
   if (/^\#/) { next; }			# skip comments
   elsif (/^\s+$/) { next; }		# skip blank lines
   else {
     my $tag = "";
     my @tokens = split(/ +/);
     my @feats = ();
-    my $hasPossibleEditor = (/(ed\.|editor|editors|eds\.)/) ? "possibleEditors" : "noEditors";
+    
+    ###
+    # Regex updated accordingly to changes in Tr2crfpp.pm by Artemy Kolchinsky and new german editor strings in ConfigLang.pm
+    # MB1
+    ###
+    my $hasPossibleEditor = (/[^A-Za-z](ed\.?|editor|editors|eds\.?|Hrsg\.?|Herausgeber|Hg\.?|hgg\.?)/i) ? "possibleEditors" : "noEditors";
+    
     my $j = 0;
     for (my $i = 0; $i <= $#tokens; $i++) {
 #    for (my $i = $#tokens; $i >= 0; $i--) {
diff --git a/lib/ParsCit/ConfigLang.pm b/lib/ParsCit/ConfigLang.pm
new file mode 100644
index 0000000..18e8180
--- /dev/null
+++ b/lib/ParsCit/ConfigLang.pm
@@ -0,0 +1,63 @@
+package ParsCit::ConfigLang;
+
+################
+# Written By Matthias Bösinger (29.03.2016)
+# 
+# Modul is used to set language specific data fields.
+# Call of 'Init' with language type passed as parameter, will cause the initialization of the gloabl data fields.
+# hasEditor: Regex used in feature determination to decide if a reference contains editor tokens.
+# authorSplit: Regex used to split contiguous as author tags labeled tokens, into several author-names.
+# authorDelete: Regex used to delete parts of an as author tag labeled token.
+# inMarker: not in use in this version -> could be used for additional feature that marks a collective volume in the reference string
+################
+
+use utf8;
+
+## Global
+$hasEditorRegex = '';
+$authorSplitRegex = '';
+$authorDeleteRegex = '';
+$inMarker = '';
+
+## Language specific data
+my %enData = ( 	'editor' => '[^A-Za-z](ed\.?|editor|editors|eds\.?)',
+				'author' => '^(&|/|and|a\.)$',
+				'delete' => 'et\.? al\.?.*$',
+				'in' 	 => 'in' );
+
+my %deData = ( 	'editor' => '[^A-Za-z](Hrsg\.?|Herausgeber|Hg\.?|hgg\.?)',
+				'author' => '^(&|/|und|u\.)$',
+				'delete' => '(u\.a\..*|et\.? al\.?.*)$',
+				'in' 	 => 'in' );
+
+
+## initialization methods
+sub Init {
+	my ($lang) = @_;
+	
+	if ($lang eq "en") {
+		initData(%enData);
+	}
+	elsif ($lang eq "de") {
+		initData(%deData);
+	}
+	#additional languages might be included here - MB
+	else {
+		return 0;
+	}
+	
+	1;
+}
+
+
+sub initData {
+	my (%data) = @_;
+	
+	$hasEditorRegex = $data{'editor'};
+	$authorSplitRegex = $data{'author'};
+	$authorDeleteRegex = $data{'delete'};
+	$inMarker = $data{'in'};
+	
+}
+
+1;
diff --git a/lib/ParsCit/PostProcess.pm b/lib/ParsCit/PostProcess.pm
index ccd7e27..624d61c 100644
--- a/lib/ParsCit/PostProcess.pm
+++ b/lib/ParsCit/PostProcess.pm
@@ -5,6 +5,9 @@ package ParsCit::PostProcess;
 # representations.
 #
 # Isaac Councill, 07/20/07
+#
+# Matthias Bösinger (29.03.2016)
+# -> all changes marked with: MB1
 ###
 
 use utf8;
@@ -140,6 +143,17 @@ sub NormalizeFields
 				# Content is a reference to a list of author
 				$content = NormalizeAuthorNames($content);
 	    	} 
+	    	###
+	    	# The NormalizeAuthorNames procedure is also used for editor strings.
+	    	# Single editors are nested within an 'editors' tag, each placed in a single 'editor' tag
+	    	# MB1
+	    	###
+	    	elsif ($tag eq "editor") 
+	    	{
+				$tag = "editors";
+				# Content is a reference to a list of author
+				$content = NormalizeAuthorNames($content);
+			}
 			elsif ($tag eq "date") 
 			{
 				$content = NormalizeDate($content);
@@ -166,7 +180,21 @@ sub NormalizeFields
 	    
 			# Heuristic - only get first instance of tag.
 	    	# TODO: we can do better than that...
-	    	unless (defined $cite_info{ $tag }) { $cite_info{ $tag } = $content; }
+#	    	unless (defined $cite_info{ $tag }) { $cite_info{ $tag } = $content; }
+
+			###
+			# At least authors and editors are insorted into existing result lists
+			# MB1
+			###
+			if (defined $cite_info{ $tag }) {
+				if ($tag eq "authors" || $tag eq "editors") {
+					my $currentlist_ref = \@{$cite_info{ $tag }};
+					push @{$currentlist_ref}, @{$content};
+				}
+			}
+			else {
+				$cite_info{ $tag } = $content;
+			}	
 		}
 	
 		push @cite_infos, \%cite_info;
@@ -269,7 +297,13 @@ sub NormalizeAuthorNames
 
     foreach my $tok (@tokens) 
 	{
-		if ($tok =~ m/^(&|and)$/i) 
+		###
+		# language specific author split strings are loaded from ParsCit::ConfigLang
+		# MB1
+		###
+		my $author_split_regex = $ParsCit::ConfigLang::authorSplitRegex;
+
+		if ($tok =~ m/$author_split_regex/i) 
 		{
 	    	if ($#current_auth >= 0) 
 			{
@@ -323,7 +357,17 @@ sub RepairAndTokenizeAuthorText
     my ($author_text) = @_;
 
     # Repair obvious parse errors and weird notations.
-    $author_text =~ s/et\.? al\.?.*$//;
+	###
+	# language specific author parts to delete are loaded from ParsCit::ConfigLang.
+	# 1) 'et al' ... and other words
+	# 2) 'in' ... as can be found at start of editor text
+	# MB1
+	###
+	my $to_delete_strings = $ParsCit::ConfigLang::authorDeleteRegex;
+    $author_text =~ s/$to_delete_strings//;
+    my $inpart = $ParsCit::ConfigLang::inMarker;
+    $author_text =~ s/^\s*$inpart:?\s*//i;
+    
     $author_text =~ s/^.*?[\p{IsUpper}\p{IsLower}][\p{IsUpper}\p{IsLower}]+\. //;
     $author_text =~ s/\(.*?\)//g;
     $author_text =~ s/^.*?\)\.?//g;
@@ -336,7 +380,9 @@ sub RepairAndTokenizeAuthorText
     $author_text =~ s/;/,/g;
     $author_text =~ s/,/, /g;
     $author_text =~ s/\:/ /g;
-    $author_text =~ s/[\:\"\<\>\/\?\{\}\[\]\+\=\(\)\*\^\%\$\#\@\!\~\_]//g;
+#   $author_text =~ s/[\:\"\<\>\/\?\{\}\[\]\+\=\(\)\*\^\%\$\#\@\!\~\_]//g;
+    $author_text =~ s/[\:\"\<\>\?\{\}\[\]\+\=\(\)\*\^\%\$\#\@\!\~\_]//g; 	#keep 'slash' since slash is often used as name seperator - MB1
+    
     $author_text = JoinMultiWordNames($author_text);
 
     my @orig_tokens	= split '\s+', $author_text;
@@ -345,7 +391,8 @@ sub RepairAndTokenizeAuthorText
     for (my $i=0; $i <= $#orig_tokens; $i++) 
 	{
 		my $tok = $orig_tokens[$i];
-		if ($tok !~ m/[\p{IsUpper}\p{IsLower}&]/) 
+		
+		if ($tok !~ m/[\p{IsUpper}\p{IsLower}&\/]/) #keep 'slash' since slash is often used as name seperator - MB1
 		{
 	    	if ($i < $#orig_tokens/2) 
 			{
@@ -445,7 +492,8 @@ sub NormalizeAuthorName2
 sub JoinMultiWordNames 
 {
     my $author_text = shift;
-    $author_text =~ s/\b((?:van|von|der|den|de|di|le|el))\s/$1_/sgi; # Thang 02 Mar 10: change \1 into \$1
+#   $author_text =~ s/\b((?:van|von|der|den|de|di|le|el))\s/$1_/sgi; # Thang 02 Mar 10: change \1 into \$1
+    $author_text =~ s/\b((?:van|von|der|den|de|di|le|el|zu|zur|vom|zum|und|dem|d'|del|da|degli|dalla|te|ter|of|v.|d.|z.))\s/$1_/sgi; # added more multi word components - MB1
     return $author_text;
 
 }
diff --git a/lib/ParsCit/Tr2crfpp.pm b/lib/ParsCit/Tr2crfpp.pm
index ef8d40f..85934cd 100644
--- a/lib/ParsCit/Tr2crfpp.pm
+++ b/lib/ParsCit/Tr2crfpp.pm
@@ -7,6 +7,9 @@ package ParsCit::Tr2crfpp;
 #
 # Copyright 2005 \251 by Min-Yen Kan (not sure what this means for IGC edits, but
 # what the hell -IGC)
+#
+# Matthias Bösinger (29.03.2016)
+# -> all changes marked with: MB1
 ###
 
 use utf8;
@@ -17,6 +20,7 @@ use Encode ();
 
 use Omni::Config;
 use ParsCit::Config;
+use ParsCit::ConfigLang; 	#MB1
 
 ### USER customizable section
 my $tmp_dir		= $ParsCit::Config::tmpDir;
@@ -679,7 +683,7 @@ sub PrepDataUnmarkedToken
 # Prepare data for trfpp
 sub PrepData 
 {
-    my ($rcite_text, $filename) = @_;
+    my ($rcite_text, $filename, $split) = @_; 	# Additional parameter 'split' - MB1
 
 	# Generate a temporary file
     my $tmpfile = BuildTmpFile($filename);
@@ -700,6 +704,18 @@ sub PrepData
 		# Skip blank lines
 		if (/^\s*$/) { next; }
 
+		###
+		# Insert extra whitespace: 
+		# 1) around dots, which are preceed by a letter and succeeded by capital letter,
+		# 2) around slashs, which are preceed by a letter, a semicolon or a dot and succeeded by a letter, a semicolon or a dot.
+		# This may occur in case of author strings. These author strings will not be processed correctly if no whitespace insertion is done.
+		# MB1
+		###
+		if ($split == 1) {
+			s/(\p{L})\.(\p{isUpper})/$1. $2/g; 	# Rule 1)
+			s/([\p{L}\.;]) ?\/ ?([\p{L}\.;])/$1 \/ $2/g; 	# Rule 2)
+		}
+
 		my $tag		= "";
 		my @tokens	= split(/ +/);
 		my @feats	= ();
@@ -710,7 +726,15 @@ sub PrepData
 		# I changed this string to match 'ed.', 'editor', 'editors', and 'eds.' if *not* 
 		# preceeded by an alphabetic character.
 		###
-		my $has_possible_editor = (/[^A-Za-z](ed\.|editor|editors|eds\.)/) ? "possibleEditors" : "noEditors";
+#		my $has_possible_editor = (/[^A-Za-z](ed\.|editor|editors|eds\.)/) ? "possibleEditors" : "noEditors";
+		
+		###
+		# language specific editor strings are loaded from ParsCit::ConfigLang
+		# And: regex set to case insensitive
+		# MB1
+		###
+		my $has_editor_regex = $ParsCit::ConfigLang::hasEditorRegex; 
+		my $has_possible_editor = (/$has_editor_regex/i) ? "possibleEditors" : "noEditors"; 
 
 		my $j = 0;
 		for (my $i = 0; $i <= $#tokens; $i++) 
@@ -958,7 +982,7 @@ sub PrepData
 						($word	=~ /^[0-9]{2-5}\([0-9]{2-5}\).?$/) ? "possibleVol" : "others";
 		    # 22 = punctuation
 			push(@{ $feats[ $j ] }, $punct);
-
+			
 		    # output tag
 		    push(@{ $feats[ $j ] }, $tag);