Skip to content

Commit 8c25b57

Browse files
committed
Utilize latest ext-dom
1 parent 251e970 commit 8c25b57

File tree

9 files changed

+59
-70
lines changed

9 files changed

+59
-70
lines changed

.github/workflows/ci.yml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,9 @@ jobs:
4343
fail-fast: false
4444
matrix:
4545
php-version:
46-
- "8.1"
47-
- "8.2"
48-
- "8.3"
4946
- "8.4"
5047
deps:
5148
- "highest"
52-
include:
53-
- php-version: "8.1"
54-
deps: "lowest"
5549

5650
steps:
5751
- name: Checkout

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ composer require brick/structured-data
2323

2424
### Requirements
2525

26-
This library requires PHP 8.1 or later. It makes use of the following extensions:
26+
This library requires PHP 8.4 or later. It makes use of the following extensions:
2727

2828
- [dom](https://www.php.net/manual/en/book.dom.php)
2929
- [json](https://www.php.net/manual/en/book.json.php)
@@ -55,13 +55,13 @@ interface Brick\StructuredData\Reader
5555
/**
5656
* Reads the items contained in the given document.
5757
*
58-
* @param DOMDocument $document The DOM document to read.
59-
* @param string $url The URL the document was retrieved from. This will be used only to resolve relative
60-
* URLs in property values. No attempt will be performed to connect to this URL.
58+
* @param HTMLDocument $document The DOM document to read.
59+
* @param string $url The URL the document was retrieved from. This will be used only to resolve relative
60+
* URLs in property values. No attempt will be performed to connect to this URL.
6161
*
6262
* @return Item[] The top-level items.
6363
*/
64-
public function read(DOMDocument $document, string $url) : array;
64+
public function read(HTMLDocument $document, string $url) : array;
6565
}
6666
```
6767

@@ -93,7 +93,7 @@ use Brick\StructuredData\Item;
9393
$microdataReader = new MicrodataReader();
9494

9595
// Wrap into HTMLReader to be able to read HTML strings or files directly,
96-
// i.e. without manually converting them to DOMDocument instances first
96+
// i.e. without manually converting them to HTMLDocument instances first
9797
$htmlReader = new HTMLReader($microdataReader);
9898

9999
// Replace this URL with that of a website you know is using Microdata

composer.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
],
1212
"license": "MIT",
1313
"require": {
14-
"php": "^8.1",
14+
"php": "^8.4",
1515
"ext-dom": "*",
1616
"ext-json": "*",
1717
"ext-libxml": "*",

src/DOMBuilder.php

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,32 +4,27 @@
44

55
namespace Brick\StructuredData;
66

7-
use DOMDocument;
7+
use DOM\HTMLDocument;
88

9+
use const DOM\HTML_NO_DEFAULT_NS;
910
use const LIBXML_NOERROR;
1011
use const LIBXML_NOWARNING;
1112

1213
final class DOMBuilder
1314
{
1415
/**
15-
* Builds a DOMDocument from an HTML string.
16+
* Builds a HTMLDocument from an HTML string.
1617
*/
17-
public static function fromHTML(string $html): DOMDocument
18+
public static function fromHTML(string $html) : HTMLDocument
1819
{
19-
$document = new DOMDocument();
20-
$document->loadHTML($html, LIBXML_NOWARNING | LIBXML_NOERROR);
21-
22-
return $document;
20+
return HTMLDocument::createFromString($html, LIBXML_NOERROR | HTML_NO_DEFAULT_NS);
2321
}
2422

2523
/**
26-
* Builds a DOMDocument from an HTML file.
24+
* Builds a HTMLDocument from an HTML file.
2725
*/
28-
public static function fromHTMLFile(string $file): DOMDocument
26+
public static function fromHTMLFile(string $file) : HTMLDocument
2927
{
30-
$document = new DOMDocument();
31-
$document->loadHTMLFile($file, LIBXML_NOWARNING | LIBXML_NOERROR);
32-
33-
return $document;
28+
return HTMLDocument::createFromFile($file, LIBXML_NOERROR | HTML_NO_DEFAULT_NS);
3429
}
3530
}

src/Reader.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
namespace Brick\StructuredData;
66

7-
use DOMDocument;
7+
use DOM\HTMLDocument;
88

99
/**
1010
* Common interface for readers of each format: Microdata, RDFa Lite, JSON-LD.
@@ -14,11 +14,11 @@ interface Reader
1414
/**
1515
* Reads the items contained in the given document.
1616
*
17-
* @param DOMDocument $document The DOM document to read.
18-
* @param string $url The URL the document was retrieved from. This will be used only to resolve relative
17+
* @param HTMLDocument $document The HTMLDocument to read.
18+
* @param string $url The URL the document was retrieved from. This will be used only to resolve relative
1919
* URLs in property values. The implementation must not attempt to connect to this URL.
2020
*
2121
* @return Item[] The top-level items.
2222
*/
23-
public function read(DOMDocument $document, string $url): array;
23+
public function read(HTMLDocument $document, string $url) : array;
2424
}

src/Reader/JsonLdReader.php

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66

77
use Brick\StructuredData\Item;
88
use Brick\StructuredData\Reader;
9-
use DOMDocument;
10-
use DOMNode;
11-
use DOMXPath;
9+
use DOM\HTMLDocument;
10+
use DOM\Node;
11+
use DOM\XPath;
1212
use Override;
1313
use Sabre\Uri\InvalidUriException;
1414
use stdClass;
@@ -66,9 +66,9 @@ public function __construct(array $iriProperties = [])
6666
}
6767

6868
#[Override]
69-
public function read(DOMDocument $document, string $url): array
69+
public function read(HTMLDocument $document, string $url): array
7070
{
71-
$xpath = new DOMXPath($document);
71+
$xpath = new XPath($document);
7272

7373
$nodes = $xpath->query('//script[@type="application/ld+json"]');
7474
$nodes = iterator_to_array($nodes);
@@ -78,7 +78,7 @@ public function read(DOMDocument $document, string $url): array
7878
}
7979

8080
$items = array_map(
81-
fn (DOMNode $node) => $this->readJson($node->textContent, $url),
81+
fn (Node $node) => $this->readJson($node->textContent, $url),
8282
$nodes,
8383
);
8484

src/Reader/MicrodataReader.php

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66

77
use Brick\StructuredData\Item;
88
use Brick\StructuredData\Reader;
9-
use DOMDocument;
10-
use DOMNode;
11-
use DOMXPath;
9+
use DOM\HTMLDocument;
10+
use DOM\Node;
11+
use DOM\XPath;
1212
use Override;
1313
use Sabre\Uri\InvalidUriException;
1414

@@ -36,9 +36,9 @@
3636
final class MicrodataReader implements Reader
3737
{
3838
#[Override]
39-
public function read(DOMDocument $document, string $url): array
39+
public function read(HTMLDocument $document, string $url): array
4040
{
41-
$xpath = new DOMXPath($document);
41+
$xpath = new XPath($document);
4242

4343
/**
4444
* An item is a top-level Microdata item if its element does not have an itemprop attribute.
@@ -49,19 +49,19 @@ public function read(DOMDocument $document, string $url): array
4949
$nodes = iterator_to_array($nodes);
5050

5151
return array_map(
52-
fn (DOMNode $node) => $this->nodeToItem($node, $xpath, $url),
52+
fn (Node $node) => $this->nodeToItem($node, $xpath, $url),
5353
$nodes,
5454
);
5555
}
5656

5757
/**
58-
* Extracts information from a DOMNode into an Item.
58+
* Extracts information from a Node into an Item.
5959
*
60-
* @param DOMNode $node A DOMNode representing an element with the itemscope attribute.
61-
* @param DOMXPath $xpath A DOMXPath object created from the node's document element.
60+
* @param Node $node A Node representing an element with the itemscope attribute.
61+
* @param XPath $xpath A XPath object created from the node's document element.
6262
* @param string $url The URL the document was retrieved from, for relative URL resolution.
6363
*/
64-
private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url): Item
64+
private function nodeToItem(Node $node, XPath $xpath, string $url): Item
6565
{
6666
$itemid = $node->attributes->getNamedItem('itemid');
6767

@@ -106,7 +106,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url): Item
106106

107107
// Exclude properties that are inside a nested item; XPath does not seem to provide a way to do this.
108108
// See: https://stackoverflow.com/q/26365495/759866
109-
$itemprops = array_filter($itemprops, function (DOMNode $itemprop) use ($node, $xpath) {
109+
$itemprops = array_filter($itemprops, function (Node $itemprop) use ($node, $xpath) {
110110
for (; ;) {
111111
$itemprop = $itemprop->parentNode;
112112

@@ -122,7 +122,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url): Item
122122

123123
$vocabularyIdentifier = $this->getVocabularyIdentifier($types);
124124

125-
/** @var DOMNode[] $itemprops */
125+
/** @var Node[] $itemprops */
126126
foreach ($itemprops as $itemprop) {
127127
/**
128128
* An element introducing a property can introduce multiple properties at once, to avoid duplication when
@@ -159,11 +159,11 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url): Item
159159
/**
160160
* @see https://www.w3.org/TR/microdata/#values
161161
*
162-
* @param DOMNode $node A DOMNode representing an element with the itemprop attribute.
163-
* @param DOMXPath $xpath A DOMXPath object created from the node's document element.
162+
* @param Node $node A Node representing an element with the itemprop attribute.
163+
* @param XPath $xpath A XPath object created from the node's document element.
164164
* @param string $url The URL the document was retrieved from, for relative URL resolution.
165165
*/
166-
private function getPropertyValue(DOMNode $node, DOMXPath $xpath, string $url): Item|string
166+
private function getPropertyValue(Node $node, XPath $xpath, string $url): Item|string
167167
{
168168
/**
169169
* If the element also has an itemscope attribute: the value is the item created by the element.

src/Reader/RdfaLiteReader.php

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66

77
use Brick\StructuredData\Item;
88
use Brick\StructuredData\Reader;
9-
use DOMDocument;
10-
use DOMNode;
11-
use DOMXPath;
9+
use DOM\HTMLDocument;
10+
use DOM\Node;
11+
use DOM\XPath;
1212
use Override;
1313
use Sabre\Uri\InvalidUriException;
1414

@@ -93,9 +93,9 @@ final class RdfaLiteReader implements Reader
9393
];
9494

9595
#[Override]
96-
public function read(DOMDocument $document, string $url): array
96+
public function read(HTMLDocument $document, string $url): array
9797
{
98-
$xpath = new DOMXPath($document);
98+
$xpath = new XPath($document);
9999

100100
/**
101101
* Top-level item has a typeof attribute and no property attribute.
@@ -104,22 +104,22 @@ public function read(DOMDocument $document, string $url): array
104104
$nodes = iterator_to_array($nodes);
105105

106106
return array_map(
107-
fn (DOMNode $node) => $this->nodeToItem($node, $xpath, $url, self::PREDEFINED_PREFIXES, null),
107+
fn (Node $node) => $this->nodeToItem($node, $xpath, $url, self::PREDEFINED_PREFIXES, null),
108108
$nodes,
109109
);
110110
}
111111

112112
/**
113-
* Extracts information from a DOMNode into an Item.
113+
* Extracts information from a Node into an Item.
114114
*
115-
* @param DOMNode $node A DOMNode representing an element with the typeof attribute.
116-
* @param DOMXPath $xpath A DOMXPath object created from the node's document element.
115+
* @param Node $node A Node representing an element with the typeof attribute.
116+
* @param XPath $xpath A XPath object created from the node's document element.
117117
* @param string $url The URL the document was retrieved from, for relative URL resolution.
118118
* @param string[] $prefixes The prefixes in use, as a map of prefix to vocabulary URL.
119119
* @param string|null $vocabulary The URL of the vocabulary in use, if any.
120120
* This is the content of the vocab attribute of the closest item ancestor.
121121
*/
122-
private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url, array $prefixes, ?string $vocabulary): Item
122+
private function nodeToItem(Node $node, XPath $xpath, string $url, array $prefixes, ?string $vocabulary): Item
123123
{
124124
$vocabulary = $this->updateVocabulary($node, $vocabulary);
125125

@@ -165,7 +165,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url, array $
165165

166166
// Exclude properties that are inside a nested item; XPath does not seem to provide a way to do this.
167167
// See: https://stackoverflow.com/q/26365495/759866
168-
$properties = array_filter($properties, function (DOMNode $itemprop) use ($node, $xpath) {
168+
$properties = array_filter($properties, function (Node $itemprop) use ($node, $xpath) {
169169
for (; ;) {
170170
$itemprop = $itemprop->parentNode;
171171

@@ -182,7 +182,7 @@ private function nodeToItem(DOMNode $node, DOMXPath $xpath, string $url, array $
182182
return false;
183183
});
184184

185-
/** @var DOMNode[] $properties */
185+
/** @var Node[] $properties */
186186
foreach ($properties as $property) {
187187
$names = $property->attributes->getNamedItem('property')->textContent;
188188

@@ -261,12 +261,12 @@ private function isValidAbsoluteURL(string $url): bool
261261
/**
262262
* Replaces the current vocabulary with the one from the vocab attribute of the current node, if set.
263263
*
264-
* @param DOMNode $node The DOMNode that may contain a vocab attribute.
264+
* @param Node $node The Node that may contain a vocab attribute.
265265
* @param string|null $vocabulary The URL of the vocabulary in use, if any.
266266
*
267267
* @return string|null The updated vocabulary URL, if any.
268268
*/
269-
private function updateVocabulary(DOMNode $node, ?string $vocabulary): ?string
269+
private function updateVocabulary(Node $node, ?string $vocabulary): ?string
270270
{
271271
$vocab = $node->attributes->getNamedItem('vocab');
272272

@@ -310,13 +310,13 @@ private function checkVocabularyUrl(string $url): ?string
310310
/**
311311
* @see https://www.w3.org/TR/microdata/#values
312312
*
313-
* @param DOMNode $node A DOMNode representing an element with the property attribute.
314-
* @param DOMXPath $xpath A DOMXPath object created from the node's document element.
313+
* @param Node $node A Node representing an element with the property attribute.
314+
* @param XPath $xpath A XPath object created from the node's document element.
315315
* @param string $url The URL the document was retrieved from, for relative URL resolution.
316316
* @param string[] $prefixes The prefixes in use, as a map of prefix to vocabulary URL.
317317
* @param string|null $vocabulary The URL of the vocabulary in use, if any.
318318
*/
319-
private function getPropertyValue(DOMNode $node, DOMXPath $xpath, string $url, array $prefixes, ?string $vocabulary): Item|string
319+
private function getPropertyValue(Node $node, XPath $xpath, string $url, array $prefixes, ?string $vocabulary): Item|string
320320
{
321321
// If the element also has an typeof attribute, create an item from the element
322322
$attr = $node->attributes->getNamedItem('typeof');

src/Reader/ReaderChain.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
namespace Brick\StructuredData\Reader;
66

77
use Brick\StructuredData\Reader;
8-
use DOMDocument;
8+
use DOM\HTMLDocument;
99
use Override;
1010

1111
use function array_merge;
@@ -29,7 +29,7 @@ public function __construct(Reader ...$readers)
2929
}
3030

3131
#[Override]
32-
public function read(DOMDocument $document, string $url): array
32+
public function read(HTMLDocument $document, string $url): array
3333
{
3434
if (! $this->readers) {
3535
return [];

0 commit comments

Comments
 (0)