Sans Pareil Technologies, Inc.

Key To Your Business

Date Parsing


Parsing ISO 8601 format date strings is a very common requirement for web services. The following is a simple parser that parses a few commonly used formats into a std::chrono::time_point instance, or equivalent value as microseconds since UNIX epoch.

Formats


ISO 8601 format is fairly wide, and the intention is not to support every variant. Most services tend to use the `YYYY-MM-dd'T'HH:mm:ss.SSSX` format. This is the format that our parser also supports. In particular, the following formats should all be successfully parsed.
2021-02-11
2021-02-11T11:17:43Z
2021-02-11T11:17:43-0600
2021-02-11T11:17:43+05:30
2021-02-11T11:17:43.123Z
2021-02-11T11:17:43.123-0600
2021-02-11T11:17:43.123+05:30
2021-02-11T11:17:43.123456Z
2021-02-11T11:17:43.123456-0600
2021-02-11T11:17:43.123456+05:30

Implementation


The following simple implementation was used in a lot of service projects I have developed.
Interface 
#pragma once

#include <chrono>
#include <string_view>
#include <variant>

namespace spt::util
{
  using DateTime = std::chrono::time_point<std::chrono::system_clock, std::chrono::microseconds>;
  int64_t microSeconds( std::string_view date );
  std::variant<DateTime, std::string> parseISO8601( const std::string_view date );
  std::string isoDateMicros( int64_t epoch );
  std::string isoDateMillis( int64_t epoch );
}
Implementation 
#include "date.h"
#include "log/NanoLog.h"

#include <sstream>

namespace spt::util
{
  namespace pdate
  {
    bool isLeapYear( int16_t year )
    {
      bool result = false;

      if ( ( year % 400 ) == 0 ) result = true;
      else if ( ( year % 100 ) == 0 ) result = false;
      else if ( ( year % 4 ) == 0 ) result = true;

      return result;
    }
  }

  int64_t microSeconds( std::string_view date )
  {
    const auto var = parseISO8601( date );
    if ( std::holds_alternative<std::string>( var ) ) return 0;
    return std::get<DateTime>( var ).time_since_epoch().count();
  }

  std::variant<DateTime, std::string> parseISO8601( std::string_view date )
  {
    // 2021-02-11
    // 2021-02-11T11:17:43Z
    // 2021-02-11T11:17:43-0600
    // 2021-02-11T11:17:43+05:30
    // 2021-02-11T11:17:43.123Z
    // 2021-02-11T11:17:43.123-0600
    // 2021-02-11T11:17:43.123+05:30
    // 2021-02-11T11:17:43.123456Z
    // 2021-02-11T11:17:43.123456-0600
    // 2021-02-11T11:17:43.123456+05:30
    if ( date.size() < 10 )
    {
      LOG_WARN << "Invalid date-time: " << date;
      return "Invalid date format";
    }
    if ( date.size() > 10 && date.size() < 20 )
    {
      LOG_WARN << "Invalid date-time: " << date;
      return "Invalid datetime format";
    }
    if ( date.size() > 10 && date[10] != 'T' )
    {
      LOG_WARN << "Invalid date-time: " << date;
      return "Invalid datetime separator";
    }

    constexpr auto microSecondsPerHour = int64_t( 3600000000 );

    char *end;
    const int16_t year = std::strtol( date.substr( 0, 4 ).data(), &end, 10 );
    const int16_t month = std::strtol( date.substr( 5, 2 ).data(), &end, 10 );
    const int16_t day = std::strtol( date.substr( 8, 2 ).data(), &end, 10 );
    const int16_t hour = ( date.size() >= 13 && date[10] == 'T' ) ?
        std::strtol( date.substr( 11, 2 ).data(), &end, 10 ) : 0;
    const int16_t minute = ( date.size() >= 16 && date[10] == 'T' && date[13] == ':' ) ?
        std::strtol( date.substr( 14, 2 ).data(), &end, 10 ) : 0;
    const int16_t second = ( date.size() >= 19 && date[10] == 'T' && date[13] == ':' && date[16] == ':') ?
        std::strtol( date.substr( 17, 2 ).data(), &end, 10 ) : 0;

    const auto parseMillis = [&date]() -> std::variant<int16_t, std::string>
    {
      if ( date[19] != '.' ) return int16_t( 0 );
      if ( date.size() < 22 )
      {
        LOG_WARN << "Invalid date-time: " << date;
        return "Invalid datetime millis";
      }
      char *end;
      return int16_t( std::strtol( std::string{ date.substr( 20, 3 ) }.data(), &end, 10 ) );
    };

    const auto parseMicros = [&date]() -> int16_t
    {
      if ( date[19] != '.' ) return int16_t( 0 );
      if ( date.size() < 25 ) return int16_t( 0 );

      switch ( date[23] )
      {
      case '+':
      case '-':
      case 'Z':
        return int16_t( 0 );
      }
      char *end;
      return int16_t( std::strtol( date.substr( 23, 3 ).data(), &end, 10 ) );
    };

    using Tuple = std::tuple<int16_t, int16_t>;
    const auto parseZone = [&date]() -> std::variant<Tuple, std::string>
    {
      if ( date.size() < 20 ) return Tuple{ int16_t( 0 ), int16_t( 0 ) };

      const auto c19 = date[19];
      switch ( c19 )
      {
      case 'Z':
        return Tuple{ int16_t( 0 ), int16_t( 0 ) };
      case '+':
      case '-':
      {
        if ( date.size() < 24 )
        {
          LOG_WARN << "Invalid date-time: " << date;
          return "Invalid datetime zone";
        }
        const int16_t mult = date[19] == '+' ? 1 : -1;
        char *end;
        if ( date.size() == 24 )
        {
          const std::string hh{ date.substr( 20, 2 ) };
          const std::string hm{ date.substr( 22, 2 ) };
          const int16_t h = std::strtol( hh.data(), &end, 10 );
          const int16_t s = std::strtol( hm.data(), &end, 10 );
          return Tuple{ h * mult, s };
        }
        if ( date[22] != ':' )
        {
          LOG_WARN << "Invalid date-time: " << date << " at 22 " << date[22] << " for size " << int( date.size() );
          return "Invalid datetime zone";
        }
        const int16_t h = std::strtol( date.substr( 20, 2 ).data(), &end, 10 );
        const int16_t s = std::strtol( date.substr( 23, 2 ).data(), &end, 10 );
        return Tuple{ h * mult, s };
      }
      case '.':
        if ( date.size() < 24 )
        {
          LOG_WARN << "Invalid date-time: " << date;
          return "Invalid datetime fraction";
        }
        const auto c23 = date[23];
        switch ( c23 )
        {
        case 'Z':
          return Tuple{ int16_t( 0 ), int16_t( 0 ) };
        case '+':
        case '-':
          if ( date.size() < 28 )
          {
            LOG_WARN << "Invalid date-time: " << date;
            return "Invalid datetime zone";
          }
          const int16_t mult = date[23] == '+' ? 1 : -1;
          char *end;
          if ( date.size() == 28 )
          {
            const std::string hh{ date.substr( 24, 2 ) };
            const std::string hm{ date.substr( 26, 2 ) };
            const int16_t h = std::strtol( hh.data(), &end, 10 );
            const int16_t s = std::strtol( hm.data(), &end, 10 );
            return Tuple{ h * mult, s };
          }
          if ( date[26] != ':' )
          {
            LOG_WARN << "Invalid date-time: " << date << " at 26 " << date[26] << " for size " << int( date.size() );
            return "Invalid datetime zone";
          }
          const int16_t h = std::strtol( date.substr( 24, 2 ).data(), &end, 10 );
          const int16_t s = std::strtol( date.substr( 27, 2 ).data(), &end, 10 );
          return Tuple{ h * mult, s };
        }

        if ( date.size() < 27 )
        {
          LOG_WARN << "Invalid date-time: " << date;
          return "Invalid datetime zone";
        }
        const auto c26 = date[26];
        switch ( c26 )
        {
        case 'Z':
          return Tuple{ int16_t( 0 ), int16_t( 0 ) };
        case '+':
        case '-':
          if ( date.size() < 31 )
          {
            LOG_WARN << "Invalid date-time: " << date;
            return "Invalid datetime zone";
          }
          const int16_t mult = date[26] == '+' ? 1 : -1;
          char *end;
          if ( date.size() == 31 )
          {
            const std::string hh{ date.substr( 27, 2 ) };
            const std::string hm{ date.substr( 29, 2 ) };
            const int16_t h = std::strtol( hh.data(), &end, 10 );
            const int16_t s = std::strtol( hm.data(), &end, 10 );
            return Tuple{ h * mult, s };
          }
          if ( date[29] != ':' )
          {
            LOG_WARN << "Invalid date-time: " << date << " at 29 " << date[29] << " for size " << int( date.size() );
            return "Invalid datetime zone";
          }
          const int16_t h = std::strtol( date.substr( 27, 2 ).data(), &end, 10 );
          const int16_t s = std::strtol( date.substr( 30, 2 ).data(), &end, 10 );
          return Tuple{ h * mult, s };
        }
      }

      LOG_WARN << "Invalid date-time: " << date;
      return "Invalid datetime format zone";
    };


    const auto pm = parseMillis();
    if ( std::holds_alternative<std::string>( pm ) ) return std::get<std::string>( pm );

    const int16_t millis = std::get<int16_t>( pm );
    const int16_t micros = parseMicros();

    const auto z = parseZone();
    if ( std::holds_alternative<std::string>( z ) ) return std::get<std::string>( z );

    const auto [dsth, dstm] = std::get<Tuple>( z );
    if ( dsth > 23 || dstm > 59 )
    {
      LOG_WARN << "Invalid date-time: " << date;
      return "Invalid datetime zone";
    }

    int64_t epoch = micros;
    epoch += millis * int64_t( 1000 );
    epoch += second * int64_t( 1000000 );
    epoch += minute * int64_t( 60000000 );
    epoch += hour * microSecondsPerHour;
    epoch += ( day - 1 ) * 24 * microSecondsPerHour;

    const int8_t isLeap = pdate::isLeapYear( year );

    for ( int i = 1; i < month; ++i )
    {
      switch ( i )
      {
      case 2:
        epoch += ( (isLeap) ? 29 : 28 ) * 24 * microSecondsPerHour;
        break;
      case 4:
      case 6:
      case 9:
      case 11:
        epoch += 30 * 24 * microSecondsPerHour;
        break;
      default:
        epoch += 31 * 24 * microSecondsPerHour;
      }
    }

    for ( int i = 1970; i < year; ++i )
    {
      if ( pdate::isLeapYear( i ) ) epoch += 366 * 24 * microSecondsPerHour;
      else epoch += 365 * 24 * microSecondsPerHour;
    }

    if ( dsth != 0 ) epoch += -1 * dsth * microSecondsPerHour;
    if ( dstm != 0 ) epoch += dstm * int64_t( 60000000 );
    return DateTime{ std::chrono::microseconds{ epoch } };
  }

  std::string isoDateMicros( int64_t epoch )
  {
    const int micros = epoch % int64_t( 1000 );
    epoch /= int64_t( 1000 );

    const int millis = epoch % int64_t( 1000 );
    epoch /= int64_t( 1000 );

    const int second = epoch % 60;

    epoch /= 60;
    const int minute = epoch % 60;

    epoch /= 60;
    const int hour = epoch % 24;
    epoch /= 24;
    int year = 1970;

    {
      int32_t days = 0;
      while ( ( days += ( pdate::isLeapYear( year ) ) ? 366 : 365 ) <= epoch ) ++year;

      days -= ( pdate::isLeapYear( year ) ) ? 366 : 365;
      epoch -= days;
    }

    uint8_t isLeap = pdate::isLeapYear( year );
    int month = 1;

    for ( ; month < 13; ++month )
    {
      int8_t length = 0;

      switch ( month )
      {
      case 2:
        length = isLeap ? 29 : 28;
        break;
      case 4:
      case 6:
      case 9:
      case 11:
        length = 30;
        break;
      default:
        length = 31;
      }

      if ( epoch >= length ) epoch -= length;
      else break;
    }

    const int day = epoch + 1;
    std::stringstream ss;
    ss << year << '-';

    if ( month < 10 ) ss << 0;
    ss << month << '-';

    if ( day < 10 ) ss << 0;
    ss << day << 'T';

    if ( hour < 10 ) ss << 0;
    ss << hour << ':';

    if ( minute < 10 ) ss << 0;
    ss << minute << ':';

    if ( second < 10 ) ss << 0;
    ss << second << '.';

    if ( millis < 10 ) ss << "00";
    else if ( millis < 100 ) ss << 0;
    ss << millis;

    if ( micros < 10 ) ss << "00";
    else if ( micros < 100 ) ss << 0;
    ss << micros << 'Z';

    return ss.str();
  }

  std::string isoDateMillis( int64_t epoch )
  {
    const int micros = epoch % int64_t( 1000 );
    epoch /= int64_t( 1000 );

    const int millis = epoch % int64_t( 1000 );
    epoch /= int64_t( 1000 );

    const int second = epoch % 60;

    epoch /= 60;
    const int minute = epoch % 60;

    epoch /= 60;
    const int hour = epoch % 24;
    epoch /= 24;
    int year = 1970;

    {
      int32_t days = 0;
      while ( ( days += ( pdate::isLeapYear( year ) ) ? 366 : 365 ) <= epoch ) ++year;

      days -= ( pdate::isLeapYear( year ) ) ? 366 : 365;
      epoch -= days;
    }

    uint8_t isLeap = pdate::isLeapYear( year );
    int month = 1;

    for ( ; month < 13; ++month )
    {
      int8_t length = 0;

      switch ( month )
      {
      case 2:
        length = isLeap ? 29 : 28;
        break;
      case 4:
      case 6:
      case 9:
      case 11:
        length = 30;
        break;
      default:
        length = 31;
      }

      if ( epoch >= length ) epoch -= length;
      else break;
    }

    const int day = epoch + 1;
    std::stringstream ss;
    ss << year << '-';

    if ( month < 10 ) ss << 0;
    ss << month << '-';

    if ( day < 10 ) ss << 0;
    ss << day << 'T';

    if ( hour < 10 ) ss << 0;
    ss << hour << ':';

    if ( minute < 10 ) ss << 0;
    ss << minute << ':';

    if ( second < 10 ) ss << 0;
    ss << second << '.';

    if ( millis < 10 ) ss << "00";
    else if ( millis < 100 ) ss << 0;
    ss << millis << 'Z';

    return ss.str();
  }
}