- Install
- sudo gem install nokogiri
- Contribute
- github.com/tenderlove/nokogiri
An HTML, XML, SAX, & Reader parser with the ability to search documents via XPath or CSS3 selectors… and much more
Attributes
-
filename
R
-
lineno
R
-
state
RW
Public Instance Methods
-
action(&block)
Show Source
-
24: def action(&block)
25: yield
26: end
-
load_file( filename )
Show Source
-
33: def load_file( filename )
34: @filename = filename
35: open(filename, "r") do |f|
36: scan_setup(f.read)
37: end
38: end
-
next_token()
Show Source
-
46: def next_token
47: return if @ss.eos?
48:
49: text = @ss.peek(1)
50: @lineno += 1 if text == "\n"
51: token = case @state
52: when nil
53: case
54: when (text = @ss.scan(/has\([\s]*/))
55: action { [:HAS, text] }
56:
57: when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\00--\1177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\00--\1177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
58: action { [:FUNCTION, text] }
59:
60: when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\00--\1177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\00--\1177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
61: action { [:IDENT, text] }
62:
63: when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\00--\1177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
64: action { [:HASH, text] }
65:
66: when (text = @ss.scan(/[\s]*~=[\s]*/))
67: action { [:INCLUDES, text] }
68:
69: when (text = @ss.scan(/[\s]*\|=[\s]*/))
70: action { [:DASHMATCH, text] }
71:
72: when (text = @ss.scan(/[\s]*\^=[\s]*/))
73: action { [:PREFIXMATCH, text] }
74:
75: when (text = @ss.scan(/[\s]*\$=[\s]*/))
76: action { [:SUFFIXMATCH, text] }
77:
78: when (text = @ss.scan(/[\s]*\*=[\s]*/))
79: action { [:SUBSTRINGMATCH, text] }
80:
81: when (text = @ss.scan(/[\s]*!=[\s]*/))
82: action { [:NOT_EQUAL, text] }
83:
84: when (text = @ss.scan(/[\s]*=[\s]*/))
85: action { [:EQUAL, text] }
86:
87: when (text = @ss.scan(/[\s]*\)/))
88: action { [:RPAREN, text] }
89:
90: when (text = @ss.scan(/[\s]*\[[\s]*/))
91: action { [:LSQUARE, text] }
92:
93: when (text = @ss.scan(/[\s]*\]/))
94: action { [:RSQUARE, text] }
95:
96: when (text = @ss.scan(/[\s]*\+[\s]*/))
97: action { [:PLUS, text] }
98:
99: when (text = @ss.scan(/[\s]*>[\s]*/))
100: action { [:GREATER, text] }
101:
102: when (text = @ss.scan(/[\s]*,[\s]*/))
103: action { [:COMMA, text] }
104:
105: when (text = @ss.scan(/[\s]*~[\s]*/))
106: action { [:TILDE, text] }
107:
108: when (text = @ss.scan(/\:not\([\s]*/))
109: action { [:NOT, text] }
110:
111: when (text = @ss.scan(/-?([0-9]+|[0-9]*\.[0-9]+)/))
112: action { [:NUMBER, text] }
113:
114: when (text = @ss.scan(/[\s]*\/\/[\s]*/))
115: action { [:DOUBLESLASH, text] }
116:
117: when (text = @ss.scan(/[\s]*\/[\s]*/))
118: action { [:SLASH, text] }
119:
120: when (text = @ss.scan(/U\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?/))
121: action {[:UNICODE_RANGE, text] }
122:
123: when (text = @ss.scan(/[\s]+/))
124: action { [:S, text] }
125:
126: when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\00--\1177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\00--\1177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*'/))
127: action { [:STRING, text] }
128:
129: when (text = @ss.scan(/./))
130: action { [text, text] }
131:
132: else
133: text = @ss.string[@ss.pos .. 1]
134: raise ScanError, "can not match: '" + text + "'"
135: end
136:
137: else
138: raise ScanError, "undefined state: '" + state.to_s + "'"
139: end
140: token
141: end
-
scan_file( filename )
Show Source
-
40: def scan_file( filename )
41: load_file(filename)
42: do_parse
43: end
-
scan_setup(str)
Show Source
-
18: def scan_setup(str)
19: @ss = StringScanner.new(str)
20: @lineno = 1
21: @state = nil
22: end
-
scan_str(str)
Show Source
-
28: def scan_str(str)
29: scan_setup(str)
30: do_parse
31: end