class-link-extractor.php 945 B

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. <?php
  2. /**
  3. * WPSEO plugin file.
  4. *
  5. * @package WPSEO\Admin\Links
  6. */
  7. /**
  8. * Represents the link extractor.
  9. */
  10. class WPSEO_Link_Extractor {
  11. /** @var string */
  12. protected $content;
  13. /**
  14. * Sets the content.
  15. *
  16. * @param string $content The content to extract the links from.
  17. */
  18. public function __construct( $content ) {
  19. $this->content = $content;
  20. }
  21. /**
  22. * Extracts the hrefs from the content and returns them as an array.
  23. *
  24. * @return array All the extracted links
  25. */
  26. public function extract() {
  27. $links = array();
  28. if ( strpos( $this->content, 'href' ) === false ) {
  29. return $links;
  30. }
  31. $regexp = '<a\s[^>]*href=("??)([^" >]*?)\\1[^>]*>';
  32. // Used modifiers iU to match case insensitive and make greedy quantifiers lazy.
  33. if ( preg_match_all( "/$regexp/iU", $this->content, $matches, PREG_SET_ORDER ) ) {
  34. foreach ( $matches as $match ) {
  35. $links[] = trim( $match[2], "'" );
  36. }
  37. }
  38. return $links;
  39. }
  40. }