Decoder.php 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. <?php
  2. namespace MaxMind\Db\Reader;
  3. class Decoder
  4. {
  5. private $fileStream;
  6. private $pointerBase;
  7. // This is only used for unit testing
  8. private $pointerTestHack;
  9. private $switchByteOrder;
  10. private $types = [
  11. 0 => 'extended',
  12. 1 => 'pointer',
  13. 2 => 'utf8_string',
  14. 3 => 'double',
  15. 4 => 'bytes',
  16. 5 => 'uint16',
  17. 6 => 'uint32',
  18. 7 => 'map',
  19. 8 => 'int32',
  20. 9 => 'uint64',
  21. 10 => 'uint128',
  22. 11 => 'array',
  23. 12 => 'container',
  24. 13 => 'end_marker',
  25. 14 => 'boolean',
  26. 15 => 'float',
  27. ];
  28. public function __construct(
  29. $fileStream,
  30. $pointerBase = 0,
  31. $pointerTestHack = false
  32. ) {
  33. $this->fileStream = $fileStream;
  34. $this->pointerBase = $pointerBase;
  35. $this->pointerTestHack = $pointerTestHack;
  36. $this->switchByteOrder = $this->isPlatformLittleEndian();
  37. }
  38. public function decode($offset)
  39. {
  40. list(, $ctrlByte) = unpack(
  41. 'C',
  42. Util::read($this->fileStream, $offset, 1)
  43. );
  44. $offset++;
  45. $type = $this->types[$ctrlByte >> 5];
  46. // Pointers are a special case, we don't read the next $size bytes, we
  47. // use the size to determine the length of the pointer and then follow
  48. // it.
  49. if ($type === 'pointer') {
  50. list($pointer, $offset) = $this->decodePointer($ctrlByte, $offset);
  51. // for unit testing
  52. if ($this->pointerTestHack) {
  53. return [$pointer];
  54. }
  55. list($result) = $this->decode($pointer);
  56. return [$result, $offset];
  57. }
  58. if ($type === 'extended') {
  59. list(, $nextByte) = unpack(
  60. 'C',
  61. Util::read($this->fileStream, $offset, 1)
  62. );
  63. $typeNum = $nextByte + 7;
  64. if ($typeNum < 8) {
  65. throw new InvalidDatabaseException(
  66. 'Something went horribly wrong in the decoder. An extended type '
  67. . 'resolved to a type number < 8 ('
  68. . $this->types[$typeNum]
  69. . ')'
  70. );
  71. }
  72. $type = $this->types[$typeNum];
  73. $offset++;
  74. }
  75. list($size, $offset) = $this->sizeFromCtrlByte($ctrlByte, $offset);
  76. return $this->decodeByType($type, $offset, $size);
  77. }
  78. private function decodeByType($type, $offset, $size)
  79. {
  80. switch ($type) {
  81. case 'map':
  82. return $this->decodeMap($size, $offset);
  83. case 'array':
  84. return $this->decodeArray($size, $offset);
  85. case 'boolean':
  86. return [$this->decodeBoolean($size), $offset];
  87. }
  88. $newOffset = $offset + $size;
  89. $bytes = Util::read($this->fileStream, $offset, $size);
  90. switch ($type) {
  91. case 'utf8_string':
  92. return [$this->decodeString($bytes), $newOffset];
  93. case 'double':
  94. $this->verifySize(8, $size);
  95. return [$this->decodeDouble($bytes), $newOffset];
  96. case 'float':
  97. $this->verifySize(4, $size);
  98. return [$this->decodeFloat($bytes), $newOffset];
  99. case 'bytes':
  100. return [$bytes, $newOffset];
  101. case 'uint16':
  102. case 'uint32':
  103. return [$this->decodeUint($bytes), $newOffset];
  104. case 'int32':
  105. return [$this->decodeInt32($bytes), $newOffset];
  106. case 'uint64':
  107. case 'uint128':
  108. return [$this->decodeBigUint($bytes, $size), $newOffset];
  109. default:
  110. throw new InvalidDatabaseException(
  111. 'Unknown or unexpected type: ' . $type
  112. );
  113. }
  114. }
  115. private function verifySize($expected, $actual)
  116. {
  117. if ($expected !== $actual) {
  118. throw new InvalidDatabaseException(
  119. "The MaxMind DB file's data section contains bad data (unknown data type or corrupt data)"
  120. );
  121. }
  122. }
  123. private function decodeArray($size, $offset)
  124. {
  125. $array = [];
  126. for ($i = 0; $i < $size; $i++) {
  127. list($value, $offset) = $this->decode($offset);
  128. array_push($array, $value);
  129. }
  130. return [$array, $offset];
  131. }
  132. private function decodeBoolean($size)
  133. {
  134. return $size === 0 ? false : true;
  135. }
  136. private function decodeDouble($bits)
  137. {
  138. // XXX - Assumes IEEE 754 double on platform
  139. list(, $double) = unpack('d', $this->maybeSwitchByteOrder($bits));
  140. return $double;
  141. }
  142. private function decodeFloat($bits)
  143. {
  144. // XXX - Assumes IEEE 754 floats on platform
  145. list(, $float) = unpack('f', $this->maybeSwitchByteOrder($bits));
  146. return $float;
  147. }
  148. private function decodeInt32($bytes)
  149. {
  150. $bytes = $this->zeroPadLeft($bytes, 4);
  151. list(, $int) = unpack('l', $this->maybeSwitchByteOrder($bytes));
  152. return $int;
  153. }
  154. private function decodeMap($size, $offset)
  155. {
  156. $map = [];
  157. for ($i = 0; $i < $size; $i++) {
  158. list($key, $offset) = $this->decode($offset);
  159. list($value, $offset) = $this->decode($offset);
  160. $map[$key] = $value;
  161. }
  162. return [$map, $offset];
  163. }
  164. private $pointerValueOffset = [
  165. 1 => 0,
  166. 2 => 2048,
  167. 3 => 526336,
  168. 4 => 0,
  169. ];
  170. private function decodePointer($ctrlByte, $offset)
  171. {
  172. $pointerSize = (($ctrlByte >> 3) & 0x3) + 1;
  173. $buffer = Util::read($this->fileStream, $offset, $pointerSize);
  174. $offset = $offset + $pointerSize;
  175. $packed = $pointerSize === 4
  176. ? $buffer
  177. : (pack('C', $ctrlByte & 0x7)) . $buffer;
  178. $unpacked = $this->decodeUint($packed);
  179. $pointer = $unpacked + $this->pointerBase
  180. + $this->pointerValueOffset[$pointerSize];
  181. return [$pointer, $offset];
  182. }
  183. private function decodeUint($bytes)
  184. {
  185. list(, $int) = unpack('N', $this->zeroPadLeft($bytes, 4));
  186. return $int;
  187. }
  188. private function decodeBigUint($bytes, $byteLength)
  189. {
  190. $maxUintBytes = log(PHP_INT_MAX, 2) / 8;
  191. if ($byteLength === 0) {
  192. return 0;
  193. }
  194. $numberOfLongs = ceil($byteLength / 4);
  195. $paddedLength = $numberOfLongs * 4;
  196. $paddedBytes = $this->zeroPadLeft($bytes, $paddedLength);
  197. $unpacked = array_merge(unpack("N$numberOfLongs", $paddedBytes));
  198. $integer = 0;
  199. // 2^32
  200. $twoTo32 = '4294967296';
  201. foreach ($unpacked as $part) {
  202. // We only use gmp or bcmath if the final value is too big
  203. if ($byteLength <= $maxUintBytes) {
  204. $integer = ($integer << 32) + $part;
  205. } elseif (extension_loaded('gmp')) {
  206. $integer = gmp_strval(gmp_add(gmp_mul($integer, $twoTo32), $part));
  207. } elseif (extension_loaded('bcmath')) {
  208. $integer = bcadd(bcmul($integer, $twoTo32), $part);
  209. } else {
  210. throw new \RuntimeException(
  211. 'The gmp or bcmath extension must be installed to read this database.'
  212. );
  213. }
  214. }
  215. return $integer;
  216. }
  217. private function decodeString($bytes)
  218. {
  219. // XXX - NOOP. As far as I know, the end user has to explicitly set the
  220. // encoding in PHP. Strings are just bytes.
  221. return $bytes;
  222. }
  223. private function sizeFromCtrlByte($ctrlByte, $offset)
  224. {
  225. $size = $ctrlByte & 0x1f;
  226. $bytesToRead = $size < 29 ? 0 : $size - 28;
  227. $bytes = Util::read($this->fileStream, $offset, $bytesToRead);
  228. $decoded = $this->decodeUint($bytes);
  229. if ($size === 29) {
  230. $size = 29 + $decoded;
  231. } elseif ($size === 30) {
  232. $size = 285 + $decoded;
  233. } elseif ($size > 30) {
  234. $size = ($decoded & (0x0FFFFFFF >> (32 - (8 * $bytesToRead))))
  235. + 65821;
  236. }
  237. return [$size, $offset + $bytesToRead];
  238. }
  239. private function zeroPadLeft($content, $desiredLength)
  240. {
  241. return str_pad($content, $desiredLength, "\x00", STR_PAD_LEFT);
  242. }
  243. private function maybeSwitchByteOrder($bytes)
  244. {
  245. return $this->switchByteOrder ? strrev($bytes) : $bytes;
  246. }
  247. private function isPlatformLittleEndian()
  248. {
  249. $testint = 0x00FF;
  250. $packed = pack('S', $testint);
  251. return $testint === current(unpack('v', $packed));
  252. }
  253. }