1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
|
<?php namespace Gajus\Dindent;
/** * @link https://github.com/gajus/dindent for the canonical source repository * @license https://github.com/gajus/dindent/blob/master/LICENSE BSD 3-Clause */ class Indenter { private $log = array(), $options = array( 'indentation_character' => ' ' ), $inline_elements = array('b', 'big', 'i', 'small', 'tt', 'abbr', 'acronym', 'cite', 'code', 'dfn', 'em', 'kbd', 'strong', 'samp', 'var', 'a', 'bdo', 'br', 'img', 'span', 'sub', 'sup'), $temporary_replacements_script = array(), $temporary_replacements_inline = array();
const ELEMENT_TYPE_BLOCK = 0; const ELEMENT_TYPE_INLINE = 1;
const MATCH_INDENT_NO = 0; const MATCH_INDENT_DECREASE = 1; const MATCH_INDENT_INCREASE = 2; const MATCH_DISCARD = 3;
/** * @param array $options */ public function __construct (array $options = array()) { foreach ($options as $name => $value) { if (!array_key_exists($name, $this->options)) { throw new Exception\InvalidArgumentException('Unrecognized option.'); }
$this->options[$name] = $value; } }
/** * @param string $element_name Element name, e.g. "b". * @param ELEMENT_TYPE_BLOCK|ELEMENT_TYPE_INLINE $type * @return null */ public function setElementType ($element_name, $type) { if ($type === static::ELEMENT_TYPE_BLOCK) { $this->inline_elements = array_diff($this->inline_elements, array($element_name)); } else if ($type === static::ELEMENT_TYPE_INLINE) { $this->inline_elements[] = $element_name; } else { throw new Exception\InvalidArgumentException('Unrecognized element type.'); }
$this->inline_elements = array_unique($this->inline_elements); }
/** * @param string $input HTML input. * @return string Indented HTML. */ public function indent ($input) { $this->log = array();
// Dindent does not indent <script> body. Instead, it temporary removes it from the code, indents the input, and restores the script body. if (preg_match_all('/<script\b[^>]*>([\s\S]*?)<\/script>/mi', $input, $matches)) { $this->temporary_replacements_script = $matches[0]; foreach ($matches[0] as $i => $match) { $input = str_replace($match, '<script>' . ($i + 1) . '</script>', $input); } }
// Removing double whitespaces to make the source code easier to read. // With exception of <pre>/ CSS white-space changing the default behaviour, double whitespace is meaningless in HTML output. // This reason alone is sufficient not to use Dindent in production. $input = str_replace("\t", '', $input); $input = preg_replace('/\s{2,}/', ' ', $input);
// Remove inline elements and replace them with text entities. if (preg_match_all('/<(' . implode('|', $this->inline_elements) . ')[^>]*>(?:[^<]*)<\/\1>/', $input, $matches)) { $this->temporary_replacements_inline = $matches[0]; foreach ($matches[0] as $i => $match) { $input = str_replace($match, 'ᐃ' . ($i + 1) . 'ᐃ', $input); } }
$subject = $input;
$output = '';
$next_line_indentation_level = 0;
do { $indentation_level = $next_line_indentation_level;
$patterns = array( // block tag '/^(<([a-z]+)(?:[^>]*)>(?:[^<]*)<\/(?:\2)>)/' => static::MATCH_INDENT_NO, // DOCTYPE '/^<!([^>]*)>/' => static::MATCH_INDENT_NO, // tag with implied closing '/^<(input|link|meta|base|br|img|source|hr)([^>]*)>/' => static::MATCH_INDENT_NO, // self closing SVG tags '/^<(animate|stop|path|circle|line|polyline|rect|use)([^>]*)\/>/' => static::MATCH_INDENT_NO, // opening tag '/^<[^\/]([^>]*)>/' => static::MATCH_INDENT_INCREASE, // closing tag '/^<\/([^>]*)>/' => static::MATCH_INDENT_DECREASE, // self-closing tag '/^<(.+)\/>/' => static::MATCH_INDENT_DECREASE, // whitespace '/^(\s+)/' => static::MATCH_DISCARD, // text node '/([^<]+)/' => static::MATCH_INDENT_NO ); $rules = array('NO', 'DECREASE', 'INCREASE', 'DISCARD');
foreach ($patterns as $pattern => $rule) { if ($match = preg_match($pattern, $subject, $matches)) { $this->log[] = array( 'rule' => $rules[$rule], 'pattern' => $pattern, 'subject' => $subject, 'match' => $matches[0] );
$subject = mb_substr($subject, mb_strlen($matches[0]));
if ($rule === static::MATCH_DISCARD) { break; }
if ($rule === static::MATCH_INDENT_NO) {
} else if ($rule === static::MATCH_INDENT_DECREASE) { $next_line_indentation_level--; $indentation_level--; } else { $next_line_indentation_level++; }
if ($indentation_level < 0) { $indentation_level = 0; }
$output .= str_repeat($this->options['indentation_character'], $indentation_level) . $matches[0] . "\n";
break; } } } while ($match);
$interpreted_input = ''; foreach ($this->log as $e) { $interpreted_input .= $e['match']; }
if ($interpreted_input !== $input) { throw new Exception\RuntimeException('Did not reproduce the exact input.'); }
$output = preg_replace('/(<(\w+)[^>]*>)\s*(<\/\2>)/', '\\1\\3', $output);
foreach ($this->temporary_replacements_script as $i => $original) { $output = str_replace('<script>' . ($i + 1) . '</script>', $original, $output); }
foreach ($this->temporary_replacements_inline as $i => $original) { $output = str_replace('ᐃ' . ($i + 1) . 'ᐃ', $original, $output); }
return trim($output); }
/** * Debugging utility. Get log for the last indent operation. * * @return array */ public function getLog () { return $this->log; } }
|