Skip to content

Commit f4b0e70

Browse files
aitelintaitelint
andauthored
[EN DateTime V2] Added support for cases like "April ninth through 15th" (#2905) (#2994)
* Added support for cases like "April ninth through 15th" (#2905) * Modified fix to use regexes instead of OrdinalExtractor according to review * Removed DateContext modifications * Corrected bug in Hindi Ordinal extraction Co-authored-by: aitelint <Fabrizio.Sorba@telusinternational.com>
1 parent 73e8247 commit f4b0e70

9 files changed

Lines changed: 387 additions & 32 deletions

File tree

.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ public static class DateTimeDefinitions
4646
public const string WrittenElevenToNineteenRegex = @"(?:eleven|twelve|(?:thir|four|fif|six|seven|eigh|nine)teen)";
4747
public const string WrittenTensRegex = @"(?:ten|twenty|thirty|fou?rty|fifty|sixty|seventy|eighty|ninety)";
4848
public static readonly string WrittenNumRegex = $@"(?:{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)";
49+
public const string WrittenOneToNineOrdinalRegex = @"(?:first|second|third|fourth|fifth|sixth|seventh|eighth|nine?th)";
50+
public const string WrittenTensOrdinalRegex = @"(?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)";
51+
public static readonly string WrittenOrdinalRegex = $@"(?:{WrittenOneToNineOrdinalRegex}|{WrittenTensOrdinalRegex}|{WrittenTensRegex}\s+{WrittenOneToNineOrdinalRegex})";
52+
public static readonly string WrittenOrdinalDayRegex = $@"\b(the\s+)?(?<day>(?<ordinal>{WrittenOneToNineOrdinalRegex}|(?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth)|(?:ten|twenty)\s+{WrittenOneToNineOrdinalRegex}|thirty\s+first))\b";
4953
public static readonly string WrittenCenturyFullYearRegex = $@"(?:(one|two)\s+thousand((\s+and)?\s+{WrittenOneToNineRegex}\s+hundred)?)";
5054
public const string WrittenCenturyOrdinalYearRegex = @"(?:twenty(\s+(one|two))?|ten|eleven|twelve|thirteen|fifteen|eighteen|(?:four|six|seven|nine)(teen)?|one|two|three|five|eight)";
5155
public static readonly string CenturyRegex = $@"\b(?<century>{WrittenCenturyFullYearRegex}|{WrittenCenturyOrdinalYearRegex}(\s+hundred)?)\b";
@@ -78,10 +82,10 @@ public static class DateTimeDefinitions
7882
public const string ToTokenRegex = @"\b(to)$";
7983
public const string FromRegex = @"\b(from(\s+the)?)$";
8084
public const string BetweenTokenRegex = @"\b(between(\s+the)?)$";
81-
public static readonly string SimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex}\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+{DayRegex})((\s+|\s*,\s*){YearRegex})?\b";
82-
public static readonly string MonthFrontSimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b";
83-
public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b";
84-
public static readonly string BetweenRegex = $@"\b(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b";
85+
public static readonly string SimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*(({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+({DayRegex}|{WrittenOrdinalDayRegex}))((\s+|\s*,\s*){YearRegex})?\b";
86+
public static readonly string MonthFrontSimpleCasesRegex = $@"\b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b";
87+
public static readonly string MonthFrontBetweenRegex = $@"\b{MonthSuffixRegex}\s+(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b";
88+
public static readonly string BetweenRegex = $@"\b(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b";
8589
public static readonly string MonthWithYear = $@"\b((({WrittenMonthRegex}[\.]?|((the\s+)?(?<cardinal>first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|sixth|6th|seventh|7th|eighth|8th|ninth|9th|tenth|10th|eleventh|11th|twelfth|12th|last)\s+month(?=\s+(of|in))))((\s*)[/\\\-\.,]?(\s+(of|in))?(\s*)({YearRegex}|(?<order>following|next|last|this)\s+year)|\s+(of|in)\s+{TwoDigitYearRegex}))|(({YearRegex}|(?<order>following|next|last|this)\s+year)(\s*),?(\s*){WrittenMonthRegex}))\b";
8690
public const string SpecialYearPrefixes = @"(calendar|(?<special>fiscal|school))";
8791
public static readonly string OneWordPeriodRegex = $@"\b((((the\s+)?month of\s+)?({StrictRelativeRegex}\s+)?{MonthRegex})|(month|year) to date|(?<toDate>((un)?till?|to)\s+date)|({RelativeRegex}\s+)?(my\s+)?((?<business>working\s+week|workweek)|week(end)?|month|fortnight|(({SpecialYearPrefixes}\s+)?year))(?!((\s+of)?\s+\d+(?!({BaseDateTime.BaseAmDescRegex}|{BaseDateTime.BasePmDescRegex}))|\s+to\s+date))(\s+{AfterNextSuffixRegex})?)\b";

.NET/Microsoft.Recognizers.Definitions.Common/Hindi/NumbersDefinitions.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ public static class NumbersDefinitions
7070
public const string DecimalUnitsRegex = @"(?:डेढ़|डेढ़|डेढ|ढाई|सवा|सावा)";
7171
public static readonly string DecimalUnitsWithRoundNumberRegex = $@"({DecimalUnitsRegex}\s+({{AllNumericalIntRegex}}\s+)?{RoundNumberIntegerRegex}|{DecimalUnitsRegex})";
7272
public const string RoundNumberOrdinalRegex = @"(?:(सौ|हजार|हज़ार|लाख|करोड़|अरब|खरब)(वां|वीं|वें|वाँ))";
73-
public const string OneToNineOrdinalRegex = @"(?:पहला|पहले|पहली|तीसरे|प्रथम|दूसरा|दूसरी|दूसरे|तिहाई|चौथाई|((पांच|पाँच|छठ|सात|आठ|नौ)(वां|वीं|वें|वाँ|वा)))";
73+
public const string OneToNineOrdinalRegex = @"(?:पहला|(?<!से\s*)पहले|पहली|तीसरे|प्रथम|दूसरा|दूसरी|दूसरे|तिहाई|चौथाई|((पांच|पाँच|छठ|सात|आठ|नौ)(वां|वीं|वें|वाँ|वा)))";
7474
public const string TenToNineteenOrdinalRegex = @"(?:(दस|ग्यारह|बारह|तेरह|चौदह|पंद्रह|सोलह|सत्रह|अठारह|उन्नीस)(वां|वीं|वें|वाँ))";
7575
public const string TwentyToTwentyNineOrdinalRegex = @"(?:(बीस|इक्कीस|बाईस|बाइस|तेईस|तेइस|चौबीस|पच्चीस|छब्बीस|सत्ताईस|सत्ताइस|अट्ठाईस|अट्ठाइस|उनतीस)(वां|वीं|वें|वाँ))";
7676
public const string ThirtyToThirtyNineOrdinalRegex = @"(?:(तीस|इकतीस|इकत्तीस|बत्तीस|तैंतीस|चौंतीस|पैंतीस|छ्त्तीस|सैंतीस|अड़तीस|उनतालीस)(वां|वीं|वें|वाँ))";

.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ public static class Constants
2525
// SourceEntity Types
2626
public const string SYS_DATETIME_DATETIMEPOINT = "datetimepoint";
2727

28+
// Number Types
29+
public const string SYS_NUMBER_ORDINAL = "builtin.num.ordinal";
30+
2831
// Model Name
2932
public const string MODEL_DATETIME = "datetime";
3033

@@ -113,6 +116,7 @@ public static class Constants
113116
public const int MaxWeekOfMonth = 5;
114117
public const int MaxMonth = 12;
115118
public const int MinMonth = 1;
119+
public const int MaxDayMonth = 31;
116120

117121
// Day start hour
118122
public const int DayHourStart = 0;
@@ -242,6 +246,7 @@ public static class Constants
242246
public const string EndGroupName = "end";
243247
public const string WithinGroupName = "within";
244248
public const string NumberGroupName = "number";
249+
public const string OrdinalGroupName = "ordinal";
245250
public const string OrderGroupName = "order";
246251
public const string AgoGroupName = "ago";
247252
public const string YesterdayGroupName = "yesterday";

.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseDatePeriodExtractor.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,10 +348,10 @@ private List<ExtractResult> ExtractImpl(string text, DateObject reference)
348348

349349
tokens.AddRange(MergeTwoTimePoints(text, reference));
350350
tokens.AddRange(MatchDuration(text, reference));
351-
tokens.AddRange(SingleTimePointWithPatterns(text, new List<ExtractResult>(ordinalExtractions), reference));
351+
tokens.AddRange(SingleTimePointWithPatterns(text, ordinalExtractions, reference));
352352
tokens.AddRange(MatchComplexCases(text, simpleCasesResults, reference));
353353
tokens.AddRange(MatchYearPeriod(text, reference));
354-
tokens.AddRange(MatchOrdinalNumberWithCenturySuffix(text, new List<ExtractResult>(ordinalExtractions)));
354+
tokens.AddRange(MatchOrdinalNumberWithCenturySuffix(text, ordinalExtractions));
355355

356356
return Token.MergeAllTokens(tokens, text, ExtractorName);
357357
}

.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseDatePeriodParser.cs

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -699,9 +699,47 @@ private DateTimeResolutionResult ParseSimpleCases(string text, DateObject refere
699699

700700
if (match.Success)
701701
{
702-
var days = match.Groups["day"];
703-
beginDay = this.config.DayOfMonth[days.Captures[0].Value];
704-
endDay = this.config.DayOfMonth[days.Captures[1].Value];
702+
var days = match.Groups[Constants.DayGroupName];
703+
var writtenDay = match.Groups[Constants.OrdinalGroupName];
704+
if (writtenDay.Captures.Count > 0 && days.Captures[0].Value == writtenDay.Captures[0].Value)
705+
{
706+
// Parse beginDay in written form
707+
var dayMatch = writtenDay.Captures[0];
708+
var dayEr = new ExtractResult
709+
{
710+
Start = dayMatch.Index,
711+
Length = dayMatch.Length,
712+
Text = dayMatch.Value,
713+
Type = Constants.SYS_NUMBER_ORDINAL,
714+
Metadata = new Metadata { IsOrdinalRelative = false, },
715+
};
716+
var dayPr = this.config.NumberParser.Parse(dayEr);
717+
beginDay = (int)(double)dayPr.Value;
718+
}
719+
else
720+
{
721+
beginDay = this.config.DayOfMonth[days.Captures[0].Value];
722+
}
723+
724+
if (writtenDay.Captures.Count > 0 && days.Captures[1].Value == writtenDay.Captures[writtenDay.Captures.Count - 1].Value)
725+
{
726+
// Parse endDay in written form
727+
var dayMatch = writtenDay.Captures[writtenDay.Captures.Count - 1];
728+
var dayEr = new ExtractResult
729+
{
730+
Start = dayMatch.Index,
731+
Length = dayMatch.Length,
732+
Text = dayMatch.Value,
733+
Type = Constants.SYS_NUMBER_ORDINAL,
734+
Metadata = new Metadata { IsOrdinalRelative = false, },
735+
};
736+
var dayPr = this.config.NumberParser.Parse(dayEr);
737+
endDay = (int)(double)dayPr.Value;
738+
}
739+
else
740+
{
741+
endDay = this.config.DayOfMonth[days.Captures[1].Value];
742+
}
705743

706744
// parse year
707745
year = config.DateExtractor.GetYearFromText(match.Match);

Patterns/English/English-DateTime.yaml

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,16 @@ WrittenTensRegex: !simpleRegex
6060
WrittenNumRegex: !nestedRegex
6161
def: (?:{WrittenOneToNineRegex}|{WrittenElevenToNineteenRegex}|{WrittenTensRegex}(\s+{WrittenOneToNineRegex})?)
6262
references: [ WrittenOneToNineRegex, WrittenElevenToNineteenRegex, WrittenTensRegex ]
63+
WrittenOneToNineOrdinalRegex: !simpleRegex
64+
def: (?:first|second|third|fourth|fifth|sixth|seventh|eighth|nine?th)
65+
WrittenTensOrdinalRegex: !simpleRegex
66+
def: (?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth|fortieth|fiftieth|sixtieth|seventieth|eightieth|ninetieth)
67+
WrittenOrdinalRegex: !nestedRegex
68+
def: (?:{WrittenOneToNineOrdinalRegex}|{WrittenTensOrdinalRegex}|{WrittenTensRegex}\s+{WrittenOneToNineOrdinalRegex})
69+
references: [ WrittenOneToNineOrdinalRegex, WrittenTensOrdinalRegex, WrittenTensRegex ]
70+
WrittenOrdinalDayRegex: !nestedRegex
71+
def: \b(the\s+)?(?<day>(?<ordinal>{WrittenOneToNineOrdinalRegex}|(?:tenth|eleventh|twelfth|thirteenth|fourteenth|fifteenth|sixteenth|seventeenth|eighteenth|nineteenth|twentieth|thirtieth)|(?:ten|twenty)\s+{WrittenOneToNineOrdinalRegex}|thirty\s+first))\b
72+
references: [ WrittenOneToNineOrdinalRegex ]
6373
WrittenCenturyFullYearRegex: !nestedRegex
6474
def: (?:(one|two)\s+thousand((\s+and)?\s+{WrittenOneToNineRegex}\s+hundred)?)
6575
references: [ WrittenOneToNineRegex]
@@ -137,17 +147,17 @@ FromRegex: !simpleRegex
137147
BetweenTokenRegex: !simpleRegex
138148
def: \b(between(\s+the)?)$
139149
SimpleCasesRegex: !nestedRegex
140-
def: \b({RangePrefixRegex}\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex}\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+{DayRegex})((\s+|\s*,\s*){YearRegex})?\b
141-
references: [ DayRegex, TillRegex, MonthSuffixRegex, YearRegex, RangePrefixRegex ]
150+
def: \b({RangePrefixRegex}\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*(({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}|{MonthSuffixRegex}\s+({DayRegex}|{WrittenOrdinalDayRegex}))((\s+|\s*,\s*){YearRegex})?\b
151+
references: [ DayRegex, TillRegex, MonthSuffixRegex, YearRegex, RangePrefixRegex, WrittenOrdinalDayRegex ]
142152
MonthFrontSimpleCasesRegex: !nestedRegex
143-
def: \b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex})\s*{TillRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b
144-
references: [ MonthSuffixRegex, DayRegex, TillRegex, YearRegex, RangePrefixRegex ]
153+
def: \b({RangePrefixRegex}\s+)?{MonthSuffixRegex}\s+((from)\s+)?({DayRegex}|{WrittenOrdinalDayRegex})\s*{TillRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b
154+
references: [ MonthSuffixRegex, DayRegex, TillRegex, YearRegex, RangePrefixRegex, WrittenOrdinalDayRegex ]
145155
MonthFrontBetweenRegex: !nestedRegex
146-
def: \b{MonthSuffixRegex}\s+(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})((\s+|\s*,\s*){YearRegex})?\b
147-
references: [ MonthSuffixRegex, DayRegex, RangeConnectorRegex , YearRegex ]
156+
def: \b{MonthSuffixRegex}\s+(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})((\s+|\s*,\s*){YearRegex})?\b
157+
references: [ MonthSuffixRegex, DayRegex, RangeConnectorRegex , YearRegex, WrittenOrdinalDayRegex ]
148158
BetweenRegex: !nestedRegex
149-
def: \b(between\s+)({DayRegex})\s*{RangeConnectorRegex}\s*({DayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b
150-
references: [ DayRegex, RangeConnectorRegex , MonthSuffixRegex, YearRegex ]
159+
def: \b(between\s+)({DayRegex}|{WrittenOrdinalDayRegex})\s*{RangeConnectorRegex}\s*({DayRegex}|{WrittenOrdinalDayRegex})\s+{MonthSuffixRegex}((\s+|\s*,\s*){YearRegex})?\b
160+
references: [ DayRegex, RangeConnectorRegex , MonthSuffixRegex, YearRegex, WrittenOrdinalDayRegex ]
151161
MonthWithYear: !nestedRegex
152162
def: \b((({WrittenMonthRegex}[\.]?|((the\s+)?(?<cardinal>first|1st|second|2nd|third|3rd|fourth|4th|fifth|5th|sixth|6th|seventh|7th|eighth|8th|ninth|9th|tenth|10th|eleventh|11th|twelfth|12th|last)\s+month(?=\s+(of|in))))((\s*)[/\\\-\.,]?(\s+(of|in))?(\s*)({YearRegex}|(?<order>following|next|last|this)\s+year)|\s+(of|in)\s+{TwoDigitYearRegex}))|(({YearRegex}|(?<order>following|next|last|this)\s+year)(\s*),?(\s*){WrittenMonthRegex}))\b
153163
references: [ WrittenMonthRegex, YearRegex, TwoDigitYearRegex ]

Patterns/Hindi/Hindi-Numbers.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ DecimalUnitsWithRoundNumberRegex: !nestedRegex
125125
RoundNumberOrdinalRegex: !simpleRegex
126126
def: (?:(सौ|हजार|हज़ार|लाख|करोड़|अरब|खरब)(वां|वीं|वें|वाँ))
127127
OneToNineOrdinalRegex: !simpleRegex
128-
def: (?:पहला|पहले|पहली|तीसरे|प्रथम|दूसरा|दूसरी|दूसरे|तिहाई|चौथाई|((पांच|पाँच|छठ|सात|आठ|नौ)(वां|वीं|वें|वाँ|वा)))
128+
def: (?:पहला|(?<!से\s*)पहले|पहली|तीसरे|प्रथम|दूसरा|दूसरी|दूसरे|तिहाई|चौथाई|((पांच|पाँच|छठ|सात|आठ|नौ)(वां|वीं|वें|वाँ|वा)))
129129
TenToNineteenOrdinalRegex: !simpleRegex
130130
def: (?:(दस|ग्यारह|बारह|तेरह|चौदह|पंद्रह|सोलह|सत्रह|अठारह|उन्नीस)(वां|वीं|वें|वाँ))
131131
TwentyToTwentyNineOrdinalRegex: !simpleRegex

0 commit comments

Comments
 (0)