dtypes.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306
  1. """
  2. Provides functionality for validation of the data-types specified
  3. for odml
  4. """
  5. import sys
  6. self = sys.modules[__name__].__dict__
  7. import datetime
  8. import binascii
  9. import hashlib
  10. from enum import Enum
  11. class DType(str, Enum):
  12. string = 'string'
  13. text = 'text'
  14. int = 'int'
  15. float = 'float'
  16. url = 'url'
  17. datetime = 'datetime'
  18. date = 'date'
  19. time = 'time'
  20. boolean = 'boolean'
  21. person = 'person'
  22. binary = 'binary'
  23. def __str__(self):
  24. return self.name
  25. _dtype_map = {'str': 'string', 'bool': 'boolean'}
  26. def infer_dtype(value):
  27. dtype = (type(value)).__name__
  28. if dtype in _dtype_map:
  29. dtype = _dtype_map[dtype]
  30. if valid_type(dtype):
  31. if dtype == 'string' and '\n' in value:
  32. dtype = 'text'
  33. return dtype
  34. else:
  35. return None
  36. def valid_type(dtype):
  37. """
  38. checks if *dtype* is a valid type
  39. """
  40. dtype = dtype.lower()
  41. if dtype in _dtype_map:
  42. dtype = _dtype_map[dtype]
  43. if hasattr(DType, dtype):
  44. return True
  45. if dtype is None:
  46. return True
  47. if dtype.endswith("-tuple"):
  48. try:
  49. int(dtype[:-6])
  50. return True
  51. except ValueError:
  52. pass
  53. return False
  54. # TODO also take encoding into account
  55. def validate(string, dtype):
  56. """
  57. checks if:
  58. * *dtype* is a valid type
  59. * *string* is a valid expression of type *dtype*
  60. """
  61. try:
  62. if not valid_type(dtype):
  63. if dtype.endswith("-tuple"):
  64. count = int(dtype[:-6])
  65. #try to parse it
  66. tuple_get(string, count=count)
  67. return True
  68. #try to parse it
  69. self.get(dtype + "_get", str_get)(string)
  70. else:
  71. return False
  72. except RuntimeError:
  73. #any error, this type ain't valid
  74. return False
  75. def get(string, dtype=None, encoding=None):
  76. """
  77. convert *string* to the corresponding *dtype*
  78. """
  79. if not dtype: return str_get(string)
  80. if dtype.endswith("-tuple"): # special case, as the count-number is included in the type-name
  81. return tuple_get(string)
  82. if dtype == "binary":
  83. return binary_get(string, encoding)
  84. return self.get(dtype + "_get", str_get)(string)
  85. def set(value, dtype=None, encoding=None):
  86. """
  87. serialize a *value* of type *dtype* to a unicode string
  88. """
  89. if not dtype:
  90. return str_set(value)
  91. if dtype.endswith("-tuple"):
  92. return tuple_set(value)
  93. if dtype == "binary":
  94. return binary_set(value, encoding)
  95. if sys.version_info > (3, 0):
  96. if isinstance(value, str):
  97. return str_set(value)
  98. else:
  99. if type(value) in (str, unicode):
  100. return str_set(value)
  101. return self.get(dtype + "_set", str_set)(value)
  102. def int_get(string):
  103. if not string: return 0
  104. try:
  105. return int(string)
  106. except ValueError:
  107. # convert to float first and then cast to int
  108. return int(float(string))
  109. def float_get(string):
  110. if not string: return 0.0
  111. return float(string)
  112. def str_get(string):
  113. if sys.version_info < (3, 0):
  114. return unicode(string)
  115. return str(string)
  116. def str_set(value):
  117. try:
  118. if sys.version_info < (3, 0):
  119. return unicode(value)
  120. else:
  121. return str(value)
  122. except Exception as ex:
  123. fail = ex
  124. raise fail
  125. def time_get(string):
  126. if not string: return None
  127. if type(string) is datetime.time:
  128. return string.strftime('%H:%M:%S').time()
  129. else:
  130. return datetime.datetime.strptime(string, '%H:%M:%S').time()
  131. def time_set(value):
  132. if not value: return None
  133. if type(value) is datetime.time:
  134. return value.strftime("%H:%M:%S")
  135. return value.isoformat()
  136. def date_get(string):
  137. if not string: return None
  138. if type(string) is datetime.date:
  139. return datetime.datetime.strptime(string.isoformat(), '%Y-%m-%d').date()
  140. else:
  141. return datetime.datetime.strptime(string, '%Y-%m-%d').date()
  142. date_set = time_set
  143. def datetime_get(string):
  144. if not string: return None
  145. if type(string) is datetime.datetime:
  146. return string.strftime('%Y-%m-%d %H:%M:%S')
  147. else:
  148. return datetime.datetime.strptime(string, '%Y-%m-%d %H:%M:%S')
  149. def datetime_set(value):
  150. if not value: return None
  151. if type(value) is datetime.datetime:
  152. return value.strftime('%Y-%m-%d %H:%M:%S')
  153. else:
  154. return datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S')
  155. def boolean_get(string):
  156. if not string: return None
  157. if type(string) is bool:
  158. string = str(string)
  159. string = string.lower()
  160. truth = ["true", "t", "1"] # be kind, spec only accepts True / False
  161. if string in truth: return True
  162. false = ["false", "f", "0"]
  163. if string in false: return False
  164. raise ValueError("Cannot interpret '%s' as boolean" % string)
  165. def boolean_set(value):
  166. if value is None: return None
  167. return str(value)
  168. def tuple_get(string, count=None):
  169. """
  170. parse a tuple string like "(1024;768)" and return strings of the elements
  171. """
  172. if not string: return None
  173. string = string.strip()
  174. assert string.startswith("(") and string.endswith(")")
  175. string = string[1:-1]
  176. res = string.split(";")
  177. if count is not None: # be strict
  178. assert len(res) == count
  179. return res
  180. def tuple_set(value):
  181. if not value: return None
  182. return "(%s)" % ";".join(value)
  183. ###############################################################################
  184. # Binary Encoding Stuff
  185. ###############################################################################
  186. class Encoder(object):
  187. def __init__(self, encode, decode):
  188. self._encode = encode
  189. self._decode = decode
  190. def encode(self, data):
  191. if sys.version_info > (3, 0) and isinstance(data, str):
  192. data = str.encode(data)
  193. return self._encode(data)
  194. def decode(self, string):
  195. return self._decode(string)
  196. encodings = {
  197. 'base64': Encoder(lambda x: binascii.b2a_base64(x).strip(), binascii.a2b_base64),
  198. 'quoted-printable': Encoder(binascii.b2a_qp, binascii.a2b_qp),
  199. 'hexadecimal': Encoder(binascii.b2a_hex, binascii.a2b_hex),
  200. None: Encoder(lambda x: x, lambda x: x), #identity encoder
  201. }
  202. def valid_encoding(encoding):
  203. return encoding in encodings
  204. def binary_get(string, encoding=None):
  205. "binary decode the *string* according to *encoding*"
  206. if not string: return None
  207. return encodings[encoding].decode(string)
  208. def binary_set(value, encoding=None):
  209. "binary encode the *value* according to *encoding*"
  210. if not value: return None
  211. return encodings[encoding].encode(value)
  212. def calculate_crc32_checksum(data):
  213. if sys.version_info < (3, 0):
  214. return "%08x" % (binascii.crc32(data) & 0xffffffff)
  215. else:
  216. if isinstance(data, str):
  217. data = str.encode(data)
  218. return "%08x" % (binascii.crc32(data) & 0xffffffff)
  219. checksums = {
  220. 'crc32': calculate_crc32_checksum,
  221. }
  222. # allow to use any available algorithm
  223. if sys.version_info > (3, 0):
  224. for algo in hashlib.algorithms_guaranteed:
  225. checksums[algo] = lambda data, func=getattr(hashlib, algo): func(data).hexdigest()
  226. elif not sys.version_info < (2, 7):
  227. for algo in hashlib.algorithms:
  228. checksums[algo] = lambda data, func=getattr(hashlib, algo): func(data).hexdigest()
  229. def valid_checksum_type(checksum_type):
  230. return checksum_type in checksums
  231. def calculate_checksum(data, checksum_type):
  232. if data is None: data = ''
  233. if isinstance(data, str):
  234. data = str.encode(data)
  235. return checksums[checksum_type](data)