Class Nokogiri::XML::Document inherits from Nokogiri::XML::Node
Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document is created by parsing an XML document. See Nokogiri.XML()
For searching a Document, see Nokogiri::XML::Node#css and Nokogiri::XML::Node#xpath
Constants
- NCNAME_CHAR
- NCNAME_RE
- NCNAME_START_CHAR
I’m ignoring unicode characters here. See www.w3.org/TR/REC-xml-names/#ns-decl for more details.
Attributes
- errors RW
A list of Nokogiri::XML::SyntaxError found when parsing a document
Public Class Methods
- new(*args) Show Source
-
Create a new document with version (defaults to “1.0”)
-
static VALUE new(int argc, VALUE *argv, VALUE klass) { xmlDocPtr doc; VALUE version, rest, rb_doc ; rb_scan_args(argc, argv, "0*", &rest); version = rb_ary_entry(rest, (long)0); if (NIL_P(version)) version = rb_str_new2("1.0"); doc = xmlNewDoc((xmlChar *)StringValuePtr(version)); rb_doc = Nokogiri_wrap_xml_document(klass, doc); rb_obj_call_init(rb_doc, argc, argv); return rb_doc ; }
- parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block) Show Source
-
Parse an XML file. string_or_io may be a String, or any object that responds to read and close such as an IO, or StringIO. url is resource where this document is located. encoding is the encoding that should be used when processing the document. options is a number that sets options in the parser, such as Nokogiri::XML::ParseOptions::RECOVER. See the constants in Nokogiri::XML::ParseOptions.
-
# File lib/nokogiri/xml/document.rb, line 25 def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options # Give the options to the user yield options if block_given? doc = if string_or_io.respond_to?(:read) url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil read_io(string_or_io, url, encoding, options.to_i) else # read_memory pukes on empty docs return new if string_or_io.nil? or string_or_io.empty? read_memory(string_or_io, url, encoding, options.to_i) end # do xinclude processing doc.do_xinclude(options) if options.xinclude? return doc end
- read_io(p1, p2, p3, p4) Show Source
-
Create a new document from an IO object
-
static VALUE read_io( VALUE klass, VALUE io, VALUE url, VALUE encoding, VALUE options ) { const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url); const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding); VALUE error_list = rb_ary_new(); VALUE document; xmlDocPtr doc; xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); doc = xmlReadIO( (xmlInputReadCallback)io_read_callback, (xmlInputCloseCallback)io_close_callback, (void *)io, c_url, c_enc, (int)NUM2INT(options) ); xmlSetStructuredErrorFunc(NULL, NULL); if(doc == NULL) { xmlErrorPtr error; xmlFreeDoc(doc); error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not parse document"); return Qnil; } document = Nokogiri_wrap_xml_document(klass, doc); rb_iv_set(document, "@errors", error_list); return document; }
- read_memory(p1, p2, p3, p4) Show Source
-
Create a new document from a String
-
static VALUE read_memory( VALUE klass, VALUE string, VALUE url, VALUE encoding, VALUE options ) { const char * c_buffer = StringValuePtr(string); const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url); const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding); int len = (int)RSTRING_LEN(string); VALUE error_list = rb_ary_new(); VALUE document; xmlDocPtr doc; xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options)); xmlSetStructuredErrorFunc(NULL, NULL); if(doc == NULL) { xmlErrorPtr error; xmlFreeDoc(doc); error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not parse document"); return Qnil; } document = Nokogiri_wrap_xml_document(klass, doc); rb_iv_set(document, "@errors", error_list); return document; }
- wrap(document) Show Source
-
JRuby Wraps Java’s org.w3c.dom.document and returns Nokogiri::XML::Document
-
# File lib/nokogiri/xml/document.rb, line 226 def self.wrap document raise "JRuby only method" unless Nokogiri.jruby? return wrapJavaDocument(document) end
Public Instance Methods
- add_child(node_or_tags) Show Source
-
# File lib/nokogiri/xml/document.rb, line 211 def add_child node_or_tags raise "Document already has a root node" if root node_or_tags = coerce(node_or_tags) if node_or_tags.is_a?(XML::NodeSet) raise "Document cannot have multiple root nodes" if node_or_tags.size > 1 super(node_or_tags.first) else super end end
- canonicalize(p1 = v1, p2 = v2, p3 = v3) Show Source
-
Canonicalize a document and return the results. Takes an optional block that takes two parameters: the obj and that node’s parent. The obj will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace The block must return a non-nil, non-false value if the obj passed in should be included in the canonicalized document.
-
static VALUE canonicalize(int argc, VALUE* argv, VALUE self) { VALUE mode; VALUE incl_ns; VALUE with_comments; xmlChar **ns; long ns_len, i; xmlDocPtr doc; xmlOutputBufferPtr buf; xmlC14NIsVisibleCallback cb = NULL; void * ctx = NULL; VALUE rb_cStringIO; VALUE io; rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments); Data_Get_Struct(self, xmlDoc, doc); rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO")); io = rb_class_new_instance(0, 0, rb_cStringIO); buf = xmlAllocOutputBuffer(NULL); buf->writecallback = (xmlOutputWriteCallback)io_write_callback; buf->closecallback = (xmlOutputCloseCallback)io_close_callback; buf->context = (void *)io; if(rb_block_given_p()) { cb = block_caller; ctx = (void *)rb_block_proc(); } if(NIL_P(incl_ns)){ ns = NULL; } else{ ns_len = RARRAY_LEN(incl_ns); ns = calloc((size_t)ns_len+1, sizeof(xmlChar *)); for (i = 0 ; i < ns_len ; i++) { VALUE entry = rb_ary_entry(incl_ns, i); const char * ptr = StringValuePtr(entry); ns[i] = (xmlChar*) ptr; } } xmlC14NExecute(doc, cb, ctx, (int) (NIL_P(mode) ? 0 : NUM2INT(mode)), ns, (int) (NIL_P(with_comments) ? 0 : 1), buf); xmlOutputBufferClose(buf); return rb_funcall(io, rb_intern("string"), 0); }
- collect_namespaces() Show Source
-
Recursively get all namespaces from this node and its subtree and return them as a hash.
For example, given this document:
<root xmlns:foo="bar"> <bar xmlns:hello="world" /> </root>
This method will return:
{ 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }WARNING: this method will clobber duplicate names in the keys. For example, given this document:
<root xmlns:foo="bar"> <bar xmlns:foo="baz" /> </root>
The hash returned will look like this: { ‘xmlns:foo’ => ‘bar’ }
Non-prefixed default namespaces (as in “xmlns=”) are not included in the hash.
Note this is a very expensive operation in current implementation, as it traverses the entire graph, and also has to bring each node accross the libxml bridge into a ruby object.
-
# File lib/nokogiri/xml/document.rb, line 137 def collect_namespaces ns = {} traverse { |j| ns.merge!(j.namespaces) } ns end
- create_cdata(string, &block) Show Source
-
# File lib/nokogiri/xml/document.rb, line 89 def create_cdata string, &block Nokogiri::XML::CDATA.new self, string.to_s, &block end
- create_comment(string, &block) Show Source
-
# File lib/nokogiri/xml/document.rb, line 94 def create_comment string, &block Nokogiri::XML::Comment.new self, string.to_s, &block end
- create_element(name, *args, &block) Show Source
-
Create an element with name, and optionally setting the content and attributes.
doc.create_element "div" # <div></div> doc.create_element "div", :class => "container" # <div class='container'></div> doc.create_element "div", "contents" # <div>contents</div> doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div> doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div> -
# File lib/nokogiri/xml/document.rb, line 62 def create_element name, *args, &block elm = Nokogiri::XML::Element.new(name, self, &block) args.each do |arg| case arg when Hash arg.each { |k,v| key = k.to_s if key =~ NCNAME_RE ns_name = key.split(":", 2)[1] elm.add_namespace_definition ns_name, v next end elm[k.to_s] = v.to_s } else elm.content = arg end end elm end
- create_entity(p1, p2 = v2, p3 = v3, p4 = v4, p5 = v5) Show Source
-
Create a new entity named name.
type is an integer representing the type of entity to be created, and it defaults to Nokogiri::XML::EntityDecl::INTERNAL_GENERAL. See the constants on Nokogiri::XML::EntityDecl for more information.
external_id, system_id, and content set the External ID, System ID, and content respectively. All of these parameters are optional.
-
static VALUE create_entity(int argc, VALUE *argv, VALUE self) { VALUE name; VALUE type; VALUE external_id; VALUE system_id; VALUE content; xmlEntityPtr ptr; xmlDocPtr doc ; Data_Get_Struct(self, xmlDoc, doc); rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id, &content); xmlResetLastError(); ptr = xmlAddDocEntity( doc, (xmlChar *)(NIL_P(name) ? NULL : StringValuePtr(name)), (int) (NIL_P(type) ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)), (xmlChar *)(NIL_P(external_id) ? NULL : StringValuePtr(external_id)), (xmlChar *)(NIL_P(system_id) ? NULL : StringValuePtr(system_id)), (xmlChar *)(NIL_P(content) ? NULL : StringValuePtr(content)) ); if(NULL == ptr) { xmlErrorPtr error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not create entity"); return Qnil; } return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr); }
- create_text_node(string, &block) Show Source
-
# File lib/nokogiri/xml/document.rb, line 84 def create_text_node string, &block Nokogiri::XML::Text.new string.to_s, self, &block end
- decorate(node) Show Source
-
Apply any decorators to node
-
# File lib/nokogiri/xml/document.rb, line 184 def decorate node return unless @decorators @decorators.each { |klass,list| next unless node.is_a?(klass) list.each { |moodule| node.extend(moodule) } } end
- decorators(key) Show Source
-
Get the list of decorators given key
-
# File lib/nokogiri/xml/document.rb, line 144 def decorators key @decorators ||= Hash.new @decorators[key] ||= [] end
- document() Show Source
-
A reference to self
-
# File lib/nokogiri/xml/document.rb, line 104 def document self end
- dup(p1 = v1) Show Source
-
Copy this Document. An optional depth may be passed in, but it defaults to a deep copy. 0 is a shallow copy, 1 is a deep copy.
-
static VALUE duplicate_node(int argc, VALUE *argv, VALUE self) { xmlDocPtr doc, dup; VALUE level; if(rb_scan_args(argc, argv, "01", &level) == 0) level = INT2NUM((long)1); Data_Get_Struct(self, xmlDoc, doc); dup = xmlCopyDoc(doc, (int)NUM2INT(level)); if(dup == NULL) return Qnil; dup->type = doc->type; return Nokogiri_wrap_xml_document(rb_obj_class(self), dup); }
- encoding() Show Source
-
Get the encoding for this Document
-
static VALUE encoding(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); if(!doc->encoding) return Qnil; return NOKOGIRI_STR_NEW2(doc->encoding); }
- encoding=(p1) Show Source
-
Set the encoding string for this Document
-
static VALUE set_encoding(VALUE self, VALUE encoding) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); if (doc->encoding) free((char *) doc->encoding); // this may produce a gcc cast warning doc->encoding = xmlStrdup((xmlChar *)StringValuePtr(encoding)); return encoding; }
- fragment(tags = nil) Show Source
-
Create a Nokogiri::XML::DocumentFragment from tags Returns an empty fragment if tags is nil.
-
# File lib/nokogiri/xml/document.rb, line 203 def fragment tags = nil DocumentFragment.new(self, tags, self.root) end
- name() Show Source
-
The name of this document. Always returns “document”
-
# File lib/nokogiri/xml/document.rb, line 99 def name 'document' end
- namespaces() Show Source
-
Get the hash of namespaces on the root Nokogiri::XML::Node
-
# File lib/nokogiri/xml/document.rb, line 196 def namespaces root ? root.namespaces : {} end
- remove_namespaces!() Show Source
-
Remove all namespaces from all nodes in the document.
This could be useful for developers who either don’t understand namespaces or don’t care about them.
The following example shows a use case, and you can decide for yourself whether this is a good thing or not:
doc = Nokogiri::XML <<-EOXML <root> <car xmlns:part="http://general-motors.com/"> <part:tire>Michelin Model XGV</part:tire> </car> <bicycle xmlns:part="http://schwinn.com/"> <part:tire>I'm a bicycle tire!</part:tire> </bicycle> </root> EOXML doc.xpath("//tire").to_s # => "" doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>" doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>" doc.remove_namespaces! doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>" doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "" doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""For more information on why this probably is not a good thing in general, please direct your browser to tenderlovemaking.com/2009/04/23/namespaces-in-xml/
-
VALUE remove_namespaces_bang(VALUE self) { xmlDocPtr doc ; Data_Get_Struct(self, xmlDoc, doc); recursively_remove_namespaces_from_node((xmlNodePtr)doc); return self; }
- root() Show Source
-
Get the root node for this document.
-
static VALUE root(VALUE self) { xmlDocPtr doc; xmlNodePtr root; Data_Get_Struct(self, xmlDoc, doc); root = xmlDocGetRootElement(doc); if(!root) return Qnil; return Nokogiri_wrap_xml_node(Qnil, root) ; }
- root=(p1) Show Source
-
Set the root element on this document
-
static VALUE set_root(VALUE self, VALUE root) { xmlDocPtr doc; xmlNodePtr new_root; xmlNodePtr old_root; Data_Get_Struct(self, xmlDoc, doc); old_root = NULL; if(NIL_P(root)) { old_root = xmlDocGetRootElement(doc); if(old_root) { xmlUnlinkNode(old_root); NOKOGIRI_ROOT_NODE(old_root); } return root; } Data_Get_Struct(root, xmlNode, new_root); /* If the new root's document is not the same as the current document, * then we need to dup the node in to this document. */ if(new_root->doc != doc) { old_root = xmlDocGetRootElement(doc); if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) { rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)"); } } xmlDocSetRootElement(doc, new_root); if(old_root) NOKOGIRI_ROOT_NODE(old_root); return root; }
- slop!() Show Source
-
Explore a document with shortcut methods. See Nokogiri::Slop for details.
Note that any nodes that have been instantiated before slop! is called will not be decorated with sloppy behavior. So, if you’re in irb, the preferred idiom is:
irb> doc = Nokogiri::Slop my_markup
and not
irb> doc = Nokogiri::HTML my_markup ... followed by irb's implicit inspect (and therefore instantiation of every node) ... irb> doc.slop! ... which does absolutely nothing.
-
# File lib/nokogiri/xml/document.rb, line 173 def slop! unless decorators(XML::Node).include? Nokogiri::Decorators::Slop decorators(XML::Node) << Nokogiri::Decorators::Slop decorate! end self end
- to_java() Show Source
-
JRuby Returns Java’s org.w3c.dom.document of this Document.
-
# File lib/nokogiri/xml/document.rb, line 234 def to_java raise "JRuby only method" unless Nokogiri.jruby? return toJavaDocument() end
- url() Show Source
-
Get the url name for this document.
-
static VALUE url(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL); return Qnil; }
- validate() Show Source
-
Validate this Document against it’s DTD. Returns a list of errors on the document or nil when there is no DTD.
-
# File lib/nokogiri/xml/document.rb, line 152 def validate return nil unless internal_subset internal_subset.validate self end
- version() Show Source
-
static VALUE version(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); if(!doc->version) return Qnil; return NOKOGIRI_STR_NEW2(doc->version); }