sitemap-librarian.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. <?php
  2. /**
  3. * Sitemaps are stored in the database using a custom table. This class
  4. * provides a small API for storing and retrieving sitemap data so we can
  5. * avoid lots of explicit SQL juggling while building sitemaps. This file
  6. * also includes the SQL used to retrieve posts and images to be included
  7. * in the sitemaps.
  8. *
  9. * @since 4.8.0
  10. * @package Jetpack
  11. */
  12. require_once dirname( __FILE__ ) . '/sitemap-constants.php';
  13. /**
  14. * This object handles any database interaction required
  15. * for sitemap generation.
  16. *
  17. * @since 4.8.0
  18. */
  19. class Jetpack_Sitemap_Librarian {
  20. /**
  21. * Retrieve a single sitemap with given name and type.
  22. * Returns null if no such sitemap exists.
  23. *
  24. * @access public
  25. * @since 4.8.0
  26. *
  27. * @param string $name Name of the sitemap to be retrieved.
  28. * @param string $type Type of the sitemap to be retrieved.
  29. *
  30. * @return array $args {
  31. * @type int $id ID number of the sitemap in the database.
  32. * @type string $timestamp Most recent timestamp of the resources pointed to.
  33. * @type string $name Name of the sitemap in the database.
  34. * @type string $type Type of the sitemap in the database.
  35. * @type string $text The content of the sitemap.
  36. * }
  37. */
  38. public function read_sitemap_data( $name, $type ) {
  39. $post_array = get_posts( array(
  40. 'numberposts' => 1,
  41. 'title' => $name,
  42. 'post_type' => $type,
  43. 'post_status' => 'draft'
  44. ) );
  45. $the_post = array_shift( $post_array );
  46. if ( null === $the_post ) {
  47. return null;
  48. } else {
  49. return array(
  50. 'id' => $the_post->ID,
  51. 'timestamp' => $the_post->post_date,
  52. 'name' => $the_post->post_title,
  53. 'type' => $the_post->post_type,
  54. 'text' => base64_decode( $the_post->post_content ),
  55. );
  56. }
  57. }
  58. /**
  59. * Store a sitemap of given type and index in the database.
  60. * Note that the timestamp is reencoded as 'Y-m-d H:i:s'.
  61. *
  62. * If a sitemap with that type and name does not exist, create it.
  63. * If a sitemap with that type and name does exist, update it.
  64. *
  65. * @access public
  66. * @since 4.8.0
  67. *
  68. * @param string $index Index of the sitemap to be stored.
  69. * @param string $type Type of the sitemap to be stored.
  70. * @param string $contents Contents of the sitemap to be stored.
  71. * @param string $timestamp Timestamp of the sitemap to be stored, in 'YYYY-MM-DD hh:mm:ss' format.
  72. */
  73. public function store_sitemap_data( $index, $type, $contents, $timestamp ) {
  74. $name = jp_sitemap_filename( $type, $index );
  75. $the_post = $this->read_sitemap_data( $name, $type );
  76. if ( null === $the_post ) {
  77. // Post does not exist.
  78. wp_insert_post(array(
  79. 'post_title' => $name,
  80. 'post_content' => base64_encode( $contents ),
  81. 'post_type' => $type,
  82. 'post_date' => date( 'Y-m-d H:i:s', strtotime( $timestamp ) ),
  83. ));
  84. } else {
  85. // Post does exist.
  86. wp_insert_post(array(
  87. 'ID' => $the_post['id'],
  88. 'post_title' => $name,
  89. 'post_content' => base64_encode( $contents ),
  90. 'post_type' => $type,
  91. 'post_date' => date( 'Y-m-d H:i:s', strtotime( $timestamp ) ),
  92. ));
  93. }
  94. }
  95. /**
  96. * Delete a sitemap by name and type.
  97. *
  98. * @access public
  99. * @since 4.8.0
  100. *
  101. * @param string $name Row name.
  102. * @param string $type Row type.
  103. *
  104. * @return bool 'true' if a row was deleted, 'false' otherwise.
  105. */
  106. public function delete_sitemap_data( $name, $type ) {
  107. $the_post = $this->read_sitemap_data( $name, $type );
  108. if ( null === $the_post ) {
  109. return false;
  110. } else {
  111. wp_delete_post( $the_post['id'] );
  112. return true;
  113. }
  114. }
  115. /**
  116. * Retrieve the contents of a sitemap with given name and type.
  117. * If no such sitemap exists, return the empty string. Note that the
  118. * returned string is run through wp_specialchars_decode.
  119. *
  120. * @access public
  121. * @since 4.8.0
  122. *
  123. * @param string $name Row name.
  124. * @param string $type Row type.
  125. *
  126. * @return string Text of the specified sitemap, or the empty string.
  127. */
  128. public function get_sitemap_text( $name, $type ) {
  129. $row = $this->read_sitemap_data( $name, $type );
  130. if ( null === $row ) {
  131. return '';
  132. } else {
  133. return $row['text'];
  134. }
  135. }
  136. /**
  137. * Delete numbered sitemaps named prefix-(p+1), prefix-(p+2), ...
  138. * until the first nonexistent sitemap is found.
  139. *
  140. * @access public
  141. * @since 4.8.0
  142. *
  143. * @param int $position Number before the first sitemap to be deleted.
  144. * @param string $type Sitemap type.
  145. */
  146. public function delete_numbered_sitemap_rows_after( $position, $type ) {
  147. $any_left = true;
  148. while ( true === $any_left ) {
  149. $position += 1;
  150. $name = jp_sitemap_filename( $type, $position );
  151. $any_left = $this->delete_sitemap_data( $name, $type );
  152. }
  153. }
  154. /**
  155. * Deletes all stored sitemap data.
  156. *
  157. * @access public
  158. * @since 4.8.0
  159. */
  160. public function delete_all_stored_sitemap_data() {
  161. $this->delete_sitemap_type_data( JP_MASTER_SITEMAP_TYPE );
  162. $this->delete_sitemap_type_data( JP_PAGE_SITEMAP_TYPE );
  163. $this->delete_sitemap_type_data( JP_PAGE_SITEMAP_INDEX_TYPE );
  164. $this->delete_sitemap_type_data( JP_IMAGE_SITEMAP_TYPE );
  165. $this->delete_sitemap_type_data( JP_IMAGE_SITEMAP_INDEX_TYPE );
  166. $this->delete_sitemap_type_data( JP_VIDEO_SITEMAP_TYPE );
  167. $this->delete_sitemap_type_data( JP_VIDEO_SITEMAP_INDEX_TYPE );
  168. }
  169. /**
  170. * Deletes all sitemap data of specific type
  171. *
  172. * @access protected
  173. * @since 5.3.0
  174. *
  175. * @param String $type
  176. */
  177. protected function delete_sitemap_type_data( $type ) {
  178. $ids = get_posts( array(
  179. 'post_type' => $type,
  180. 'post_status' => 'draft',
  181. 'fields' => 'ids'
  182. ) );
  183. foreach( $ids as $id ) {
  184. wp_trash_post( $id );
  185. }
  186. }
  187. /**
  188. * Retrieve an array of sitemap rows (of a given type) sorted by ID.
  189. *
  190. * Returns the smallest $num_posts sitemap rows (measured by ID)
  191. * of the given type which are larger than $from_id.
  192. *
  193. * @access public
  194. * @since 4.8.0
  195. *
  196. * @param string $type Type of the sitemap rows to retrieve.
  197. * @param int $from_id Greatest lower bound of retrieved sitemap post IDs.
  198. * @param int $num_posts Largest number of sitemap posts to retrieve.
  199. *
  200. * @return array The sitemaps, as an array of associative arrays.
  201. */
  202. public function query_sitemaps_after_id( $type, $from_id, $num_posts ) {
  203. global $wpdb;
  204. return $wpdb->get_results(
  205. $wpdb->prepare(
  206. "SELECT *
  207. FROM $wpdb->posts
  208. WHERE post_type=%s
  209. AND post_status=%s
  210. AND ID>%d
  211. ORDER BY ID ASC
  212. LIMIT %d;",
  213. $type,
  214. 'draft',
  215. $from_id,
  216. $num_posts
  217. ),
  218. ARRAY_A
  219. ); // WPCS: db call ok; no-cache ok.
  220. }
  221. /**
  222. * Retrieve an array of posts sorted by ID.
  223. *
  224. * More precisely, returns the smallest $num_posts posts
  225. * (measured by ID) which are larger than $from_id.
  226. *
  227. * @access public
  228. * @since 4.8.0
  229. *
  230. * @param int $from_id Greatest lower bound of retrieved post IDs.
  231. * @param int $num_posts Largest number of posts to retrieve.
  232. *
  233. * @return array The posts.
  234. */
  235. public function query_posts_after_id( $from_id, $num_posts ) {
  236. global $wpdb;
  237. // Get the list of post types to include and prepare for query.
  238. $post_types = Jetpack_Options::get_option_and_ensure_autoload(
  239. 'jetpack_sitemap_post_types',
  240. array( 'page', 'post' )
  241. );
  242. foreach ( (array) $post_types as $i => $post_type ) {
  243. $post_types[ $i ] = $wpdb->prepare( '%s', $post_type );
  244. }
  245. $post_types_list = join( ',', $post_types );
  246. return $wpdb->get_results(
  247. $wpdb->prepare(
  248. "SELECT *
  249. FROM $wpdb->posts
  250. WHERE post_status='publish'
  251. AND post_type IN ($post_types_list)
  252. AND ID>%d
  253. ORDER BY ID ASC
  254. LIMIT %d;",
  255. $from_id,
  256. $num_posts
  257. )
  258. ); // WPCS: db call ok; no-cache ok.
  259. }
  260. /**
  261. * Get the most recent timestamp among approved comments for the given post_id.
  262. *
  263. * @access public
  264. * @since 4.8.0
  265. *
  266. * @param int $post_id Post identifier.
  267. *
  268. * @return int Timestamp in 'Y-m-d h:i:s' format (UTC) of the most recent comment on the given post, or null if no such comments exist.
  269. */
  270. public function query_latest_approved_comment_time_on_post( $post_id ) {
  271. global $wpdb;
  272. return $wpdb->get_var(
  273. $wpdb->prepare(
  274. "SELECT MAX(comment_date_gmt)
  275. FROM $wpdb->comments
  276. WHERE comment_post_ID = %d AND comment_approved = '1' AND comment_type=''",
  277. $post_id
  278. )
  279. );
  280. }
  281. /**
  282. * Retrieve an array of image posts sorted by ID.
  283. *
  284. * More precisely, returns the smallest $num_posts image posts
  285. * (measured by ID) which are larger than $from_id.
  286. *
  287. * @access public
  288. * @since 4.8.0
  289. *
  290. * @param int $from_id Greatest lower bound of retrieved image post IDs.
  291. * @param int $num_posts Largest number of image posts to retrieve.
  292. *
  293. * @return array The posts.
  294. */
  295. public function query_images_after_id( $from_id, $num_posts ) {
  296. global $wpdb;
  297. return $wpdb->get_results(
  298. $wpdb->prepare(
  299. "SELECT *
  300. FROM $wpdb->posts
  301. WHERE post_type='attachment'
  302. AND post_mime_type LIKE %s
  303. AND ID>%d
  304. ORDER BY ID ASC
  305. LIMIT %d;",
  306. 'image/%',
  307. $from_id,
  308. $num_posts
  309. )
  310. ); // WPCS: db call ok; no-cache ok.
  311. }
  312. /**
  313. * Retrieve an array of video posts sorted by ID.
  314. *
  315. * More precisely, returns the smallest $num_posts video posts
  316. * (measured by ID) which are larger than $from_id.
  317. *
  318. * @access public
  319. * @since 4.8.0
  320. *
  321. * @param int $from_id Greatest lower bound of retrieved video post IDs.
  322. * @param int $num_posts Largest number of video posts to retrieve.
  323. *
  324. * @return array The posts.
  325. */
  326. public function query_videos_after_id( $from_id, $num_posts ) {
  327. global $wpdb;
  328. return $wpdb->get_results(
  329. $wpdb->prepare(
  330. "SELECT *
  331. FROM $wpdb->posts
  332. WHERE post_type='attachment'
  333. AND post_mime_type LIKE %s
  334. AND ID>%d
  335. ORDER BY ID ASC
  336. LIMIT %d;",
  337. 'video/%',
  338. $from_id,
  339. $num_posts
  340. )
  341. ); // WPCS: db call ok; no-cache ok.
  342. }
  343. /**
  344. * Retrieve an array of published posts from the last 2 days.
  345. *
  346. * @access public
  347. * @since 4.8.0
  348. *
  349. * @param int $num_posts Largest number of posts to retrieve.
  350. *
  351. * @return array The posts.
  352. */
  353. public function query_most_recent_posts( $num_posts ) {
  354. global $wpdb;
  355. $two_days_ago = date( 'Y-m-d', strtotime( '-2 days' ) );
  356. /**
  357. * Filter post types to be included in news sitemap.
  358. *
  359. * @module sitemaps
  360. *
  361. * @since 3.9.0
  362. *
  363. * @param array $post_types Array with post types to include in news sitemap.
  364. */
  365. $post_types = apply_filters(
  366. 'jetpack_sitemap_news_sitemap_post_types',
  367. array( 'page', 'post' )
  368. );
  369. foreach ( (array) $post_types as $i => $post_type ) {
  370. $post_types[ $i ] = $wpdb->prepare( '%s', $post_type );
  371. }
  372. $post_types_list = join( ',', $post_types );
  373. return $wpdb->get_results(
  374. $wpdb->prepare(
  375. "SELECT *
  376. FROM $wpdb->posts
  377. WHERE post_status='publish'
  378. AND post_date >= '%s'
  379. AND post_type IN ($post_types_list)
  380. ORDER BY post_date DESC
  381. LIMIT %d;",
  382. $two_days_ago,
  383. $num_posts
  384. )
  385. ); // WPCS: db call ok; no-cache ok.
  386. }
  387. }