zz log

zaininnari Blog

openpear/PEG パーサコンビネータを使ったCSSパーサ&バリデータ

PEGパーサコンビネータ

前回までは、「uupaa-js」( http://code.google.com/p/uupaa-js/ )の正規表現を使ったパーサでしたが、逆戻りしました。

CSSパーサ&バリデータ

http://github.com/zaininnari/CssParser

  • 内部については、 HatenaSyntax ( http://openpear.org/package/HatenaSyntax )を参考にしました。ここまで綺麗じゃありません
  • パーサとバリデータから成り立っています。
    • パーサは、CSSの文字列をそれっぽく分解します。(ほぼ完成)
      • @規則(charset、import、media、font-face、page)を認識します。
      • 解釈できないものは、unknownで返されます。
      • コメントは、無視します。
      • 省略した記述「p {color:red」(「;」の省略/「}」の省略)を認識します。
    • バリデータは、パースしたものをチェックします。(未完成)
      • CSS2.1と比べて仕様の量の少ないソフトバンク向けCSSが入っています。(テストは不十分です)
      • CSS2.1は、セレクタのみチェックできます。OKなら、isValidがtrueになります。
      • 宣言(colo:red)も少しだけ、チェックできます。OKなら、isValidがtrueになります。
      • @規則は手つかずです。

サンプル

@charset "UTF-8";
@import url("style.css") screen , print;
@font-face {
	src: local("Myfont"),url(Myfont.ttf);
}
@page :left {
	margin-left: 4cm;
}
@media screen {
	p {
		font-size: 16px;
	}
}
* #id.class>:link+:lang(ja) ,
div:first-line[attr]:before:after
{
	font-size: 16px;
	xxxxx-xxxx;       /* 宣言の形を成していないもの */
}                 

パースのみ

<?php
var_dump(CssParser::parse($input));


object(CssParser_Node)[973]
protected 'type' => string 'root' (length=4)
protected 'offset' => int 0
protected 'data' =>
array
0 =>
object(CssParser_Node)[950]
protected 'type' => string 'atRule' (length=6)
protected 'offset' => int 0
protected 'data' =>
array
'selector' =>
object(CssParser_Node)[948]
protected 'type' => string '@charset' (length=8)
protected 'offset' => int 0
protected 'data' => string '@charset' (length=8)
'value' =>
object(CssParser_Node)[949]
protected 'type' => string 'value' (length=5)
protected 'offset' => int 10
protected 'data' => string 'UTF-8' (length=5)
1 =>
object(CssParser_Node)[954]
protected 'type' => string 'atRule' (length=6)
protected 'offset' => int 18
protected 'data' =>
array
'selector' =>
object(CssParser_Node)[951]
protected 'type' => string '@import' (length=7)
protected 'offset' => int 18
protected 'data' => string '@import' (length=7)
'value' =>
object(CssParser_Node)[952]
protected 'type' => string 'value' (length=5)
protected 'offset' => int 26
protected 'data' => string 'url("style.css")' (length=16)
'mediaType' =>
object(CssParser_Node)[953]
protected 'type' => string 'mediaType' (length=9)
protected 'offset' => int 43
protected 'data' => string 'screen , print' (length=14)
2 =>
object(CssParser_Node)[958]
protected 'type' => string 'atRule' (length=6)
protected 'offset' => int 59
protected 'data' =>
array
'selector' =>
object(CssParser_Node)[955]
protected 'type' => string '@font-face' (length=10)
protected 'offset' => int 59
protected 'data' => string '@font-face' (length=10)
'block' =>
array
0 =>
array
'property' =>
object(CssParser_Node)[956]
protected 'type' => string 'property' (length=8)
protected 'offset' => int 73
protected 'data' => string 'src' (length=3)
'value' =>
object(CssParser_Node)[957]
protected 'type' => string 'value' (length=5)
protected 'offset' => int 78
protected 'data' => string 'local("Myfont"),url(Myfont.ttf)' (length=31)
'isImportant' => boolean false
3 =>
object(CssParser_Node)[962]
protected 'type' => string 'atRule' (length=6)
protected 'offset' => int 113
protected 'data' =>
array
'selector' =>
object(CssParser_Node)[959]
protected 'type' => string '@page' (length=5)
protected 'offset' => int 113
protected 'data' => string '@page :left' (length=11)
'block' =>
array
0 =>
array
'property' =>
object(CssParser_Node)[960]
protected 'type' => string 'property' (length=8)
protected 'offset' => int 128
protected 'data' => string 'margin-left' (length=11)
'value' =>
object(CssParser_Node)[961]
protected 'type' => string 'value' (length=5)
protected 'offset' => int 141
protected 'data' => string '4cm' (length=3)
'isImportant' => boolean false
4 =>
object(CssParser_Node)[970]
protected 'type' => string 'atRule' (length=6)
protected 'offset' => int 148
protected 'data' =>
array
'selector' =>
object(CssParser_Node)[963]
protected 'type' => string '@media' (length=6)
protected 'offset' => int 148
protected 'data' => string '@media screen' (length=13)
'value' =>
array
0 =>
array
'selector' =>
object(CssParser_Node)[967]
protected 'type' => string 'selector' (length=8)
protected 'offset' => int 165
protected 'data' => string 'p' (length=1)
'block' =>
array
0 =>
array
'property' =>
object(CssParser_Node)[968]
protected 'type' => string 'property' (length=8)
protected 'offset' => int 171
protected 'data' => string 'font-size' (length=9)
'value' =>
object(CssParser_Node)[969]
protected 'type' => string 'value' (length=5)
protected 'offset' => int 182
protected 'data' => string '16px' (length=4)
'isImportant' => boolean false
5 =>
object(CssParser_Node)[972]
protected 'type' => string 'ruleSet' (length=7)
protected 'offset' => int 193
protected 'data' =>
array
'selector' =>
object(CssParser_Node)[964]
protected 'type' => string 'selector' (length=8)
protected 'offset' => int 193
protected 'data' => string '* #id.class>:link+:lang(ja) , div:first-line[attr]:before:after' (length=63)
'block' =>
array
0 =>
array
'property' =>
object(CssParser_Node)[965]
protected 'type' => string 'property' (length=8)
protected 'offset' => int 260
protected 'data' => string 'font-size' (length=9)
'value' =>
object(CssParser_Node)[966]
protected 'type' => string 'value' (length=5)
protected 'offset' => int 271
protected 'data' => string '16px' (length=4)
'isImportant' => boolean false
1 =>
object(CssParser_Node)[971]
protected 'type' => string 'unknown' (length=7)
protected 'offset' => int 278
protected 'data' => string 'xxxxx-xxxx; /* 宣言の形を成していないもの */ ' (length=64)

バリデータ(セレクタ部分のみ)

<?php
var_dump(CssParser::validate($input));


object(CssParser_Node)[955]
protected 'type' => string 'root' (length=4)
protected 'offset' => int 0
protected 'data' =>
array
0 =>
object(CssParser_Node)[954]
protected 'type' => string 'ruleSet' (length=7)
protected 'offset' => int 0
protected 'data' =>
array
'selector' =>
object(CssParser_Node)[946]
protected 'type' => string 'selector' (length=8)
protected 'offset' => int 0
protected 'data' =>
array
0 =>
array
'selector' => string '* #id.class>:link+:lang(ja)' (length=27)
'cleanSelector' => string '* #id.class>:link+:lang(ja)' (length=27)
'parsedSelector' =>
array
0 =>
array
0 => string 'universal' (length=9)
1 => string '*' (length=1)
1 =>
array
0 => string 'descendant' (length=10)
1 => string ' ' (length=1)
2 =>
array
0 => string 'id' (length=2)
1 => string '#id' (length=3)
3 =>
array
0 => string 'class' (length=5)
1 => string '.class' (length=6)
4 =>
array
0 => string 'child' (length=5)
1 => string '>' (length=1)
5 =>
array
0 => string 'link' (length=4)
1 => string ':link' (length=5)
6 =>
array
0 => string 'adjacent' (length=8)
1 => string '+' (length=1)
7 =>
array
0 => string 'language' (length=8)
1 => string ':lang(ja)' (length=9)
'error' =>
array
empty
'isValid' => boolean true

1 =>
array
'selector' => string 'div:first-line[attr]:before:after' (length=33)
'cleanSelector' => string 'div:first-line[attr]:before:after' (length=33)
'parsedSelector' =>
array
0 =>
array
0 => string 'type' (length=4)
1 => string 'div' (length=3)
1 =>
array
0 => string 'first-line' (length=10)
1 => string ':first-line' (length=11)
2 =>
array
0 => string 'attribute' (length=9)
1 => string '[attr]' (length=6)
3 =>
array
0 => string 'before' (length=6)
1 => string ':before' (length=7)
4 =>
array
0 => string 'after' (length=5)
1 => string ':after' (length=6)
'error' =>
array
empty
'isValid' => boolean true
'block' =>
array
0 =>
array
'property' =>
object(CssParser_Node)[949]
protected 'type' => string 'property' (length=8)
protected 'offset' => int 67
protected 'data' => string 'color' (length=5)
'value' =>
object(CssParser_Node)[950]
protected 'type' => string 'value' (length=5)
protected 'offset' => int 74
protected 'data' => string 'red' (length=3)
'isImportant' => boolean false
'isValid' => boolean true
1 =>
object(CssParser_Node)[951]
protected 'type' => string 'unknown' (length=7)
protected 'offset' => int 80
protected 'data' => string 'xxxxx-xxxx; /* 宣言の形を成していないもの */ ' (length=64)