Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
# readr (development version)

* `locale()` gains a `date_order` argument to control the component order used
when parsing dates and date-times (e.g. `"mdy"`, `"dmy"`, `"ymd_hms"`). This
makes it possible to read year-last formats such as `10/02/2024` that the
automatic type guesser would otherwise treat as character.

* Date and date-time auto-detection now accepts any non-alphanumeric separator
between components (e.g. `2024.10.02`, `2024/10/02`), and falls back to a
year-last heuristic so unambiguous `D/M/YYYY` values are recognised as dates.

# readr 2.2.0

This release advances many deprecations.
Expand Down
45 changes: 43 additions & 2 deletions R/locale.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
#' @param asciify Should diacritics be stripped from date names and converted to
#' ASCII? This is useful if you're dealing with ASCII data where the correct
#' spellings have been lost. Requires the \pkg{stringi} package.
#' @param date_order Order of date components for auto-detection. One of
#' `"ymd"`, `"ydm"`, `"mdy"`, `"myd"`, `"dmy"`, `"dym"`, or those combined
#' with a time suffix: `"_hms"`, `"_hm"`, or `"_h"` (e.g. `"mdy_hms"`).
#' Use `NULL` (default) for automatic detection.
#' @export
#' @examples
#' locale()
Expand All @@ -47,7 +51,8 @@ locale <- function(
grouping_mark = ",",
tz = "UTC",
encoding = "UTF-8",
asciify = FALSE
asciify = FALSE,
date_order = NULL
) {
if (is.character(date_names)) {
date_names <- date_names_lang(date_names)
Expand Down Expand Up @@ -76,6 +81,38 @@ locale <- function(
tz <- check_tz(tz)
check_encoding(encoding)

if (!is.null(date_order)) {
check_string(date_order)
}

valid_date_orders <- c(
"ymd",
"ydm",
"mdy",
"myd",
"dmy",
"dym",
"ymd_hms",
"ymd_hm",
"ymd_h",
"mdy_hms",
"mdy_hm",
"mdy_h",
"dmy_hms",
"dmy_hm",
"dmy_h",
"ydm_hms",
"ydm_hm",
"ydm_h"
)
if (!is.null(date_order) && !date_order %in% valid_date_orders) {
stop(
"`date_order` must be NULL or one of: ",
paste(valid_date_orders, collapse = ", "),
call. = FALSE
)
}

structure(
list(
date_names = date_names,
Expand All @@ -84,7 +121,8 @@ locale <- function(
decimal_mark = decimal_mark,
grouping_mark = grouping_mark,
tz = tz,
encoding = encoding
encoding = encoding,
date_order = date_order
),
class = "locale"
)
Expand All @@ -107,6 +145,9 @@ print.locale <- function(x, ...) {
sep = ""
)
cat("Formats: ", x$date_format, " / ", x$time_format, "\n", sep = "")
if (!is.null(x$date_order)) {
cat("Date order: ", x$date_order, "\n", sep = "")
}
cat("Timezone: ", x$tz, "\n", sep = "")
cat("Encoding: ", x$encoding, "\n", sep = "")
print(x$date_names)
Expand Down
8 changes: 7 additions & 1 deletion man/locale.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

44 changes: 37 additions & 7 deletions src/Collector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,16 @@ CollectorPtr Collector::create(const cpp11::list& spec, LocaleInfo* pLocale) {
}
if (subclass == "collector_date") {
SEXP format_ = spec["format"];
std::string format = (Rf_isNull(format_)) != 0U
? pLocale->dateFormat_
: cpp11::as_cpp<std::string>(format_);
std::string format;
if ((Rf_isNull(format_)) == 0U) {
// Explicit format given by user
format = cpp11::as_cpp<std::string>(format_);
} else if (pLocale->dateOrder_.empty()) {
// No date_order set: use locale date format (e.g. "%AD" -> ISO8601)
format = pLocale->dateFormat_;
}
// When date_order is set and no explicit format: leave format empty
// so setValue() will dispatch through parseDateOrder()
return CollectorPtr(new CollectorDate(pLocale, format));
}
if (subclass == "collector_datetime") {
Expand Down Expand Up @@ -106,8 +113,23 @@ void CollectorDate::setValue(int i, const Token& t) {
std::string std_string(string.first, string.second);

parser_.setDate(std_string.c_str());
bool res =
(format_.empty()) ? parser_.parseLocaleDate() : parser_.parse(format_);
bool res;
if (!format_.empty()) {
res = parser_.parse(format_);
} else if (!pLocale_->dateOrder_.empty() &&
pLocale_->dateOrder_.find('_') == std::string::npos) {
// Explicit date-only order (e.g. "mdy", "dmy")
res = parser_.parseDateOrder(pLocale_->dateOrder_);
} else {
res = parser_.parseLocaleDate();
if (!res) {
// Auto-detection fallback: year-last heuristic (D/M/YYYY, M/D/YYYY).
// Mirrors isDate() in CollectorGuess.cpp so the guesser and the
// collector agree on which strings count as dates.
parser_.setDate(std_string.c_str());
res = parser_.parseYearLastHeuristic();
}
}

if (!res) {
warn(t.row(), t.col(), "date like " + format_, std_string);
Expand Down Expand Up @@ -141,8 +163,16 @@ void CollectorDateTime::setValue(int i, const Token& t) {
std::string std_string(string.first, string.second);

parser_.setDate(std_string.c_str());
bool res =
(format_.empty()) ? parser_.parseISO8601() : parser_.parse(format_);
bool res;
if (!format_.empty()) {
res = parser_.parse(format_);
} else if (!pLocale_->dateOrder_.empty() &&
pLocale_->dateOrder_.find('_') != std::string::npos) {
// Explicit datetime order (e.g. "mdy_hms", "dmy_hm")
res = parser_.parseDateOrder(pLocale_->dateOrder_);
} else {
res = parser_.parseISO8601();
}

if (!res) {
warn(t.row(), t.col(), "date like " + format_, std_string);
Expand Down
8 changes: 6 additions & 2 deletions src/Collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,14 @@ class CollectorCharacter : public Collector {
class CollectorDate : public Collector {
std::string format_;
DateTimeParser parser_;
LocaleInfo* pLocale_;

public:
CollectorDate(LocaleInfo* pLocale, const std::string& format)
: Collector(cpp11::writable::doubles(R_xlen_t(0))),
format_(format),
parser_(pLocale) {}
parser_(pLocale),
pLocale_(pLocale) {}

void setValue(int i, const Token& t);

Expand All @@ -116,13 +118,15 @@ class CollectorDateTime : public Collector {
std::string format_;
DateTimeParser parser_;
std::string tz_;
LocaleInfo* pLocale_;

public:
CollectorDateTime(LocaleInfo* pLocale, const std::string& format)
: Collector(cpp11::writable::doubles(R_xlen_t(0))),
format_(format),
parser_(pLocale),
tz_(pLocale->tz_) {}
tz_(pLocale->tz_),
pLocale_(pLocale) {}

void setValue(int i, const Token& t);

Expand Down
37 changes: 29 additions & 8 deletions src/CollectorGuess.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,26 +96,47 @@ bool isTime(const std::string& x, LocaleInfo* pLocale) {

bool isDate(const std::string& x, LocaleInfo* pLocale) {
DateTimeParser parser(pLocale);
parser.setDate(x.c_str());

// Explicit date-only order (no '_' suffix means date-only, e.g. "mdy", "dmy")
if (!pLocale->dateOrder_.empty() &&
pLocale->dateOrder_.find('_') == std::string::npos) {
return parser.parseDateOrder(pLocale->dateOrder_);
}

// If a datetime order is explicitly set, don't match as date-only
if (!pLocale->dateOrder_.empty()) {
return false;
}

// Auto-detection: locale date format first (handles YMD/%AD), then year-last heuristic
if (parser.parseLocaleDate()) return true;

parser.setDate(x.c_str());
return parser.parseLocaleDate();
return parser.parseYearLastHeuristic();
}

static bool isDateTime(const std::string& x, LocaleInfo* pLocale) {
DateTimeParser parser(pLocale);

parser.setDate(x.c_str());
bool ok = parser.parseISO8601();

if (!ok) {
return false;
// Explicit datetime order (has '_' suffix, e.g. "mdy_hms", "dmy_hm")
if (!pLocale->dateOrder_.empty() &&
pLocale->dateOrder_.find('_') != std::string::npos) {
if (!parser.parseDateOrder(pLocale->dateOrder_)) return false;
return parser.makeDateTime().validDateTime();
}

if (!parser.compactDate()) {
return true;
// If a date-only order is explicitly set, don't match as datetime
if (!pLocale->dateOrder_.empty()) {
return false;
}

// Values like 00014567 are unlikely to be dates, so don't guess
// Auto-detection: ISO8601 only (YMD, existing behavior — no change)
bool ok = parser.parseISO8601();
if (!ok) return false;

if (!parser.compactDate()) return true;
return parser.year() > 999;
}

Expand Down
Loading
Loading