Class Nokogiri::HTML::Document inherits from Nokogiri::XML::Document
Public Class Methods
- new(...) Show Source
Create a new document
-
static VALUE new(int argc, VALUE *argv, VALUE klass) { VALUE uri, external_id, rest, rb_doc; htmlDocPtr doc; rb_scan_args(argc, argv, "0*", &rest); uri = rb_ary_entry(rest, (long)0); external_id = rb_ary_entry(rest, (long)1); doc = htmlNewDoc( RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL, RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL ); rb_doc = Nokogiri_wrap_xml_document(klass, doc); rb_obj_call_init(rb_doc, argc, argv); return rb_doc ; }
- parse(string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block) Show Source
Parse HTML. thing may be a String, or any object that responds to read and close such as an IO, or StringIO. url is resource where this document is located. encoding is the encoding that should be used when processing the document. options is a number that sets options in the parser, such as Nokogiri::XML::ParseOptions::RECOVER. See the constants in Nokogiri::XML::ParseOptions.
-
# File lib/nokogiri/html/document.rb, line 64 64: def parse string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block 65: 66: options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options 67: # Give the options to the user 68: yield options if block_given? 69: 70: if string_or_io.respond_to?(:encoding) 71: unless string_or_io.encoding.name == "ASCII-8BIT" 72: encoding ||= string_or_io.encoding.name 73: end 74: end 75: 76: if string_or_io.respond_to?(:read) 77: url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil 78: return read_io(string_or_io, url, encoding, options.to_i) 79: end 80: 81: # read_memory pukes on empty docs 82: return new if string_or_io.nil? or string_or_io.empty? 83: 84: read_memory(string_or_io, url, encoding, options.to_i) 85: end
- read_io(p1, p2, p3, p4) Show Source
Read the HTML document from io with given url, encoding, and options. See Nokogiri::HTML.parse
-
static VALUE read_io( VALUE klass, VALUE io, VALUE url, VALUE encoding, VALUE options ) { const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url); const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding); VALUE error_list = rb_ary_new(); VALUE document; htmlDocPtr doc; xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); doc = htmlReadIO( io_read_callback, io_close_callback, (void *)io, c_url, c_enc, (int)NUM2INT(options) ); xmlSetStructuredErrorFunc(NULL, NULL); if(doc == NULL) { xmlErrorPtr error; xmlFreeDoc(doc); error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not parse document"); return Qnil; } document = Nokogiri_wrap_xml_document(klass, doc); rb_iv_set(document, "@errors", error_list); return document; }
- read_memory(p1, p2, p3, p4) Show Source
Read the HTML document contained in string with given url, encoding, and options. See Nokogiri::HTML.parse
-
static VALUE read_memory( VALUE klass, VALUE string, VALUE url, VALUE encoding, VALUE options ) { const char * c_buffer = StringValuePtr(string); const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url); const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding); int len = (int)RSTRING_LEN(string); VALUE error_list = rb_ary_new(); VALUE document; htmlDocPtr doc; xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); doc = htmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options)); xmlSetStructuredErrorFunc(NULL, NULL); if(doc == NULL) { xmlErrorPtr error; xmlFreeDoc(doc); error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not parse document"); return Qnil; } document = Nokogiri_wrap_xml_document(klass, doc); rb_iv_set(document, "@errors", error_list); return document; }
Public Instance Methods
- fragment(tags = nil) Show Source
Create a Nokogiri::XML::DocumentFragment from tags
-
# File lib/nokogiri/html/document.rb, line 51 51: def fragment tags = nil 52: DocumentFragment.new(self, tags, self.root) 53: end
- meta_encoding() Show Source
Get the meta tag encoding for this document. If there is no meta tag, then nil is returned
-
# File lib/nokogiri/html/document.rb, line 7 7: def meta_encoding 8: return nil unless meta = css('meta').find { |node| 9: node['http-equiv'] =~ /Content-Type/ 10: } 11: 12: /charset\s*=\s*([\w-]+)/.match(meta['content'])[1] 13: end
- meta_encoding=(encoding) Show Source
Set the meta tag encoding for this document. If there is no meta content tag, nil is returned and the encoding is not set.
-
# File lib/nokogiri/html/document.rb, line 18 18: def meta_encoding= encoding 19: return nil unless meta = css('meta').find { |node| 20: node['http-equiv'] =~ /Content-Type/ 21: } 22: 23: meta['content'] = "text/html; charset=%s" % encoding 24: encoding 25: end
- serialize(options = {}) Show Source
Serialize Node using options. Save options can also be set using a block. See SaveOptions.
These two statements are equivalent:
node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
or
node.serialize(:encoding => 'UTF-8') do |config| config.format.as_xml end-
# File lib/nokogiri/html/document.rb, line 41 41: def serialize options = {}, &block 42: options[:save_with] ||= XML::Node::SaveOptions::FORMAT | 43: XML::Node::SaveOptions::AS_HTML | 44: XML::Node::SaveOptions::NO_DECLARATION | 45: XML::Node::SaveOptions::NO_EMPTY_TAGS 46: super 47: end
- type() Show Source
The type for this document
-
static VALUE type(VALUE self) { htmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); return INT2NUM((long)doc->type); }