Class Nokogiri::XML::Document inherits from Node
Nokogiri::XML::Document is the main entry point for dealing with XML documents. The Document is created by parsing an XML document. See Nokogiri.XML()
For searching a Document, see Nokogiri::XML::Node#css and Nokogiri::XML::Node#xpath
Attributes
- errors RW
A list of Nokogiri::XML::SyntaxError found when parsing a document
Public Class Methods
- new(...) Show Source
Create a new document with version (defaults to “1.0“)
-
static VALUE new(int argc, VALUE *argv, VALUE klass) { VALUE version, rest, rb_doc ; rb_scan_args(argc, argv, "0*", &rest); version = rb_ary_entry(rest, (long)0); if (NIL_P(version)) version = rb_str_new2("1.0"); xmlDocPtr doc = xmlNewDoc((xmlChar *)StringValuePtr(version)); rb_doc = Nokogiri_wrap_xml_document(klass, doc); rb_obj_call_init(rb_doc, argc, argv); return rb_doc ; }
- new(*args) Show Source
-
# File lib/nokogiri/xml/document.rb, line 39 def initialize *args @decorators = nil end
- parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block) Show Source
Parse an XML file. thing may be a String, or any object that responds to read and close such as an IO, or StringIO. url is resource where this document is located. encoding is the encoding that should be used when processing the document. options is a number that sets options in the parser, such as Nokogiri::XML::ParseOptions::RECOVER. See the constants in Nokogiri::XML::ParseOptions.
-
# File lib/nokogiri/xml/document.rb, line 19 def self.parse string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options # Give the options to the user yield options if block_given? if string_or_io.respond_to?(:read) url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil return read_io(string_or_io, url, encoding, options.to_i) end # read_memory pukes on empty docs return new if string_or_io.nil? or string_or_io.empty? read_memory(string_or_io, url, encoding, options.to_i) end
- read_io(p1, p2, p3, p4) Show Source
Create a new document from an IO object
-
static VALUE read_io( VALUE klass, VALUE io, VALUE url, VALUE encoding, VALUE options ) { const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url); const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding); VALUE error_list = rb_ary_new(); xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); xmlDocPtr doc = xmlReadIO( (xmlInputReadCallback)io_read_callback, (xmlInputCloseCallback)io_close_callback, (void *)io, c_url, c_enc, (int)NUM2INT(options) ); xmlSetStructuredErrorFunc(NULL, NULL); if(doc == NULL) { xmlFreeDoc(doc); xmlErrorPtr error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not parse document"); return Qnil; } VALUE document = Nokogiri_wrap_xml_document(klass, doc); rb_iv_set(document, "@errors", error_list); return document; }
- read_memory(p1, p2, p3, p4) Show Source
Create a new document from a String
-
static VALUE read_memory( VALUE klass, VALUE string, VALUE url, VALUE encoding, VALUE options ) { const char * c_buffer = StringValuePtr(string); const char * c_url = NIL_P(url) ? NULL : StringValuePtr(url); const char * c_enc = NIL_P(encoding) ? NULL : StringValuePtr(encoding); int len = RSTRING_LEN(string); VALUE error_list = rb_ary_new(); xmlResetLastError(); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); xmlDocPtr doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options)); xmlSetStructuredErrorFunc(NULL, NULL); if(doc == NULL) { xmlFreeDoc(doc); xmlErrorPtr error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Could not parse document"); return Qnil; } VALUE document = Nokogiri_wrap_xml_document(klass, doc); rb_iv_set(document, "@errors", error_list); return document; }
Public Instance Methods
- <<(child)
Alias for add_child
- add_child(child) Show Source
-
# File lib/nokogiri/xml/document.rb, line 145 def add_child child raise "Document already has a root node" if root if child.type == Node::DOCUMENT_FRAG_NODE raise "Document cannot have multiple root nodes" if child.children.size > 1 super(child.children.first) else super end end
- clone(...)
Alias for dup
- collect_namespaces() Show Source
Recursively get all namespaces from this node and its subtree and return them as a hash.
For example, given this document:
<root xmlns:foo="bar"> <bar xmlns:hello="world" /> </root>This method will return:
{ 'xmlns:foo' => 'bar', 'xmlns:hello' => 'world' }WARNING: this method will clobber duplicate names in the keys. For example, given this document:
<root xmlns:foo="bar"> <bar xmlns:foo="baz" /> </root>The hash returned will look like this: { ‘xmlns:foo’ => ‘bar’ }
-
# File lib/nokogiri/xml/document.rb, line 85 def collect_namespaces ns = {} traverse { |j| ns.merge!(j.namespaces) } ns end
- create_element(name, &block) Show Source
Create an element with name
-
# File lib/nokogiri/xml/document.rb, line 44 def create_element name, &block Nokogiri::XML::Element.new(name, self, &block) end
- create_text_node(text, &block) Show Source
Create a text node with text
-
# File lib/nokogiri/xml/document.rb, line 49 def create_text_node text, &block Nokogiri::XML::Text.new(text.to_s, self, &block) end
- decorate(node) Show Source
Apply any decorators to node
-
# File lib/nokogiri/xml/document.rb, line 118 def decorate node return unless @decorators @decorators.each { |klass,list| next unless node.is_a?(klass) list.each { |moodule| node.extend(moodule) } } end
- decorators(key) Show Source
Get the list of decorators given key
-
# File lib/nokogiri/xml/document.rb, line 92 def decorators key @decorators ||= Hash.new @decorators[key] ||= [] end
- document() Show Source
A reference to self
-
# File lib/nokogiri/xml/document.rb, line 59 def document self end
- dup(...) Show Source
Copy this Document. An optional depth may be passed in, but it defaults to a deep copy. 0 is a shallow copy, 1 is a deep copy.
-
static VALUE duplicate_node(int argc, VALUE *argv, VALUE self) { VALUE level; if(rb_scan_args(argc, argv, "01", &level) == 0) level = INT2NUM((long)1); xmlDocPtr doc, dup; Data_Get_Struct(self, xmlDoc, doc); dup = xmlCopyDoc(doc, (int)NUM2INT(level)); if(dup == NULL) return Qnil; dup->type = doc->type; return Nokogiri_wrap_xml_document(rb_obj_class(self), dup); }
- encoding() Show Source
Get the encoding for this Document
-
static VALUE encoding(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); if(!doc->encoding) return Qnil; return NOKOGIRI_STR_NEW2(doc->encoding); }
- encoding=(p1) Show Source
Set the encoding string for this Document
-
static VALUE set_encoding(VALUE self, VALUE encoding) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); doc->encoding = xmlStrdup((xmlChar *)StringValuePtr(encoding)); return encoding; }
- fragment(tags = nil) Show Source
Create a Nokogiri::XML::DocumentFragment from tags Returns an empty fragment if tags is nil.
-
# File lib/nokogiri/xml/document.rb, line 137 def fragment tags = nil DocumentFragment.new(self, tags) end
- name() Show Source
The name of this document. Always returns “document“
-
# File lib/nokogiri/xml/document.rb, line 54 def name 'document' end
- namespaces() Show Source
Get the hash of namespaces on the root Nokogiri::XML::Node
-
# File lib/nokogiri/xml/document.rb, line 130 def namespaces root ? root.namespaces : {} end
- remove_namespaces!() Show Source
Remove all namespaces from all nodes in the document.
This could be useful for developers who either don’t understand namespaces or don’t care about them.
The following example shows a use case, and you can decide for yourself whether this is a good thing or not:
doc = Nokogiri::XML <<-EOXML <root> <car xmlns:part="http://general-motors.com/"> <part:tire>Michelin Model XGV</part:tire> </car> <bicycle xmlns:part="http://schwinn.com/"> <part:tire>I'm a bicycle tire!</part:tire> </bicycle> </root> EOXML doc.xpath("//tire").to_s # => "" doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>" doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>" doc.remove_namespaces! doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>" doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "" doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""For more information on why this probably is not a good thing in general, please direct your browser to tenderlovemaking.com/2009/04/23/namespaces-in-xml/
-
VALUE remove_namespaces_bang(VALUE self) { xmlDocPtr doc ; Data_Get_Struct(self, xmlDoc, doc); recursively_remove_namespaces_from_node((xmlNodePtr)doc); return self; }
- root() Show Source
Get the root node for this document.
-
static VALUE root(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); xmlNodePtr root = xmlDocGetRootElement(doc); if(!root) return Qnil; return Nokogiri_wrap_xml_node(Qnil, root) ; }
- root=(p1) Show Source
Set the root element on this document
-
static VALUE set_root(VALUE self, VALUE root) { xmlDocPtr doc; xmlNodePtr new_root; Data_Get_Struct(self, xmlDoc, doc); Data_Get_Struct(root, xmlNode, new_root); xmlNodePtr old_root = NULL; /* If the new root's document is not the same as the current document, * then we need to dup the node in to this document. */ if(new_root->doc != doc) { old_root = xmlDocGetRootElement(doc); if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) { rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)"); } } xmlDocSetRootElement(doc, new_root); if(old_root) NOKOGIRI_ROOT_NODE(old_root); return root; }
- slop!() Show Source
Explore a document with shortcut methods.
-
# File lib/nokogiri/xml/document.rb, line 107 def slop! unless decorators(XML::Node).include? Nokogiri::Decorators::Slop decorators(XML::Node) << Nokogiri::Decorators::Slop decorate! end self end
- url() Show Source
Get the url name for this document.
-
static VALUE url(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL); return Qnil; }
- validate() Show Source
Validate this Document against it’s DTD. Returns a list of errors on the document or nil when there is no DTD.
-
# File lib/nokogiri/xml/document.rb, line 100 def validate return nil unless internal_subset internal_subset.validate self end
- version() Show Source
-
static VALUE version(VALUE self) { xmlDocPtr doc; Data_Get_Struct(self, xmlDoc, doc); if(!doc->version) return Qnil; return NOKOGIRI_STR_NEW2(doc->version); }