egmkang 服务端开发工程师

format

2014-09-10

在我的green_turtle库里面, 有实现一个logger类, 用来打印日志.

最开始的API设计成这样样子:

  void Debug(const char *pattern, ...)
  __attribute__((__format__(__printf__, 2, 3)));

  void VDebug(const std::string &prefix, const char *pattern, va_list ap)
  __attribute__((__format__(__printf__, 3, 0)));

是因为我个人比较喜欢snprintf这种格式化风格. 但是snprintf这种格式化风格, 有两个缺点: * 格式化字符串必须得是字面量, 否则不能保证类型安全 * 格式化字符串需要parse一次, 性能可能会比较低下

在一次profiling的过程中, 发现logger库绝大部分耗时都在snprintf中, 所以想要改进这个. 最初的想法是, 通过可变参数模板弄一个类型安全的snprintf.

那么至少先要把泛型T转化成String先实现了:

template <typename... Tn>
int32_t Format(char *input, int32_t max_length, Tn &&... vn) {
  return __Format(input, max_length, 0, std::forward<Tn>(vn)...);
}

然后通过各种特化来实现比如说itoa, dtoa:

template <typename T>
inline int32_t ToString(T value, char *buffer, int32_t left);

#define FORMAT_VALUE(TYPE, FUNC) \
  template <> \
inline int32_t ToString(TYPE value, char *buffer, int32_t left) { \
  char array[64]; \
  int32_t length = FUNC(value, array); \
  if (length <= left) \
  memcpy(buffer, array, length); \
  else \
  buffer[0] = 0; \
  return length; \
}

FORMAT_VALUE(unsigned char, u32toa_sse2);
FORMAT_VALUE(signed short, i32toa_sse2);
FORMAT_VALUE(unsigned short, u32toa_sse2);
FORMAT_VALUE(signed int, i32toa_sse2);
FORMAT_VALUE(unsigned int, u32toa_sse2);
FORMAT_VALUE(signed long, i64toa_sse2);
FORMAT_VALUE(unsigned long, u64toa_sse2);
FORMAT_VALUE(signed long long, i64toa_sse2);
FORMAT_VALUE(float, dtoa_milo);
FORMAT_VALUE(double, dtoa_milo);

其中u32toa_sse2dtoa_milo, 直接采用了miloyip的实现. num2a代码里面保留了原先的License.

然后再加上对String的特化, 这基本上就要告一段落了. 可是, 既然是格式化, 那么就要有格式, 是不是?! 什么: * 对齐 * 宽度 * 大小写 * HEX

都得支持吧!

##所以加入了一个AlignValue的结构体, 准备特化用来实现对齐这些需求:

enum {
  kAlignLeft = 1,
  kAlignRight = 2,
  kAlignCenter = 3,
  kAlignMask = 3,
  kAlignHex = 4,
  kAlignUpper = 8,
};
template <typename T>
struct AlignValue {
  const T &value;
  uint8_t width;
  int8_t pad;
  int8_t align;
};

template<typename T>
inline AlignValue<T> Align(int8_t align, T &&value, uint8_t width, int8_t pad = '0') {
  return AlignValue<T>{std::forward<T>(value), width, pad, align};
}

template <typename T>
inline int32_t ToString(AlignValue<T> align, char *buffer, int32_t left) {
  int32_t length = ToHex(align.value, buffer, left);
  int32_t max_length = length < align.width ? align.width : length;
  int32_t space_length = max_length - length;
  if (length < 0 || left < max_length) return -1;
  if (align.align & kAlignUpper) {
    ToUpper(buffer, length);
  }
  if (space_length > 0) {
    if ((align.align & kAlignMask) == kAlignLeft) {
      memset(buffer + length, align.pad, space_length);
    } else if ((align.align & kAlignMask) == kAlignRight) {
      memmove(buffer + space_length, buffer, length);
      memset(buffer, align.pad, space_length);
    } else if ((align.align & kAlignMask) == kAlignCenter) {
      memmove(buffer + space_length / 2, buffer, length);
      memset(buffer, align.pad, space_length / 2);
      memset(buffer + length + space_length / 2, align.pad,
          max_length - length - space_length / 2);
    }
  }
  buffer[max_length] = '\0';
  return max_length;
}

为了防止某些人手贱, 把其他类型传入到ToHex里面去, 所以ToHex模板也写了几个特化, 把整数类型做了HEX, 其他类型导入原先的ToString里面去了.

这边是ToHex, 和ToUpper的实现:

template <typename T>
inline int32_t ToHex(T v, char *buffer, int32_t left) {
  char array[33];
  int8_t length = 0;
  typename std::make_unsigned<T>::type value = v;
  for (int8_t index = sizeof(T) * 8 - 4; index >= 0; index -= 4) {
    uint8_t hex_value = value >> index;
    if (length || hex_value) {
      array[length++] = "0123456789abcdef"[hex_value];
    }
    value = value & ((1ull << index) - 1);
  }
  array[length] = '\0';
  if (left < length) return -1;
  memcpy(buffer, array, length);
  return length;
}

inline void ToUpper(char *buffer, int32_t len){
  const int32_t byte_4_len = len / 4;
  const int32_t left_len = len % 4;
  for (int32_t i = 0; i < byte_4_len; ++i) {
    uint32_t *d = reinterpret_cast<uint32_t*>(buffer) + i;
    uint32_t eax = *d;
    uint32_t ebx = (0x7f7f7f7fu & eax) + 0x05050505u;
    ebx = (0x7f7f7f7fu & ebx) + 0x1a1a1a1au;
    ebx = ((ebx & ~eax) >> 2) & 0x20202020u;
    *d = eax - ebx;
  }
  char *buffer_left = buffer + byte_4_len * 4;
  switch(left_len)
  {
    case 3: *buffer_left = toupper(*buffer_left); ++buffer_left;
    case 2: *buffer_left = toupper(*buffer_left); ++buffer_left;
    case 1: *buffer_left = toupper(*buffer_left); ++buffer_left;
    case 0: *buffer_left = '\0';
  }
}

ostringstream

到这里为止, 基本上就可以实现一个功能可ostringstream类似的Format库.

snprintf

我比较懒, 现在的Format库可以用了, 就不太想去搞snprintf了….肿么办

PS: 最近入手一台新的电脑,E3 1230V3, 16G内存, 1T 64M的硬盘, 跑了一下我的log库, 每秒可以输出700W行log, 表明format库性能还是刚刚滴:-D


下一篇 vim-plugin

Comments