Class Nokogiri::XML::Reader inherits from Object
Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The Reader is given an XML document, and yields nodes to an each block.
Here is an example of usage:
reader = Nokogiri::XML::Reader(<<-eoxml) <x xmlns:tenderlove='http://tenderlovemaking.com/'> <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo> </x> eoxml reader.each do |node| # node is an instance of Nokogiri::XML::Reader puts node.name end
Note that Nokogiri::XML::Reader#each can only be called once!! Once the cursor moves through the entire document, you must parse the document again. So make sure that you capture any information you need during the first iteration.
The Reader parser is good for when you need the speed of a SAX parser, but do not want to write a Document handler.
Constants
- TYPE_ATTRIBUTE
Attribute node type
- TYPE_CDATA
CDATA node type
- TYPE_COMMENT
Comment node type
- TYPE_DOCUMENT
Document node type
- TYPE_DOCUMENT_FRAGMENT
Document Fragment node type
- TYPE_DOCUMENT_TYPE
Document Type node type
- TYPE_ELEMENT
Element node type
- TYPE_END_ELEMENT
Element end node type
- TYPE_END_ENTITY
Entity end node type
- TYPE_ENTITY
Entity node type
- TYPE_ENTITY_REFERENCE
Entity Reference node type
- TYPE_NONE
- TYPE_NOTATION
Notation node type
- TYPE_PROCESSING_INSTRUCTION
PI node type
- TYPE_SIGNIFICANT_WHITESPACE
Significant Whitespace node type
- TYPE_TEXT
Text node type
- TYPE_WHITESPACE
Whitespace node type
- TYPE_XML_DECLARATION
XML Declaration node type
Attributes
- encoding R
The encoding for the document
- errors RW
A list of errors encountered while parsing
- source R
The XML source
Public Class Methods
- from_io(p1, p2 = v2, p3 = v3, p4 = v4) Show Source
-
Create a new reader that parses io
-
static VALUE from_io(int argc, VALUE *argv, VALUE klass) { VALUE rb_io, rb_url, encoding, rb_options; xmlTextReaderPtr reader; const char * c_url = NULL; const char * c_encoding = NULL; int c_options = 0; VALUE rb_reader, args[3]; rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options); if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil"); if (RTEST(rb_url)) c_url = StringValuePtr(rb_url); if (RTEST(encoding)) c_encoding = StringValuePtr(encoding); if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options); reader = xmlReaderForIO( (xmlInputReadCallback)io_read_callback, (xmlInputCloseCallback)io_close_callback, (void *)rb_io, c_url, c_encoding, c_options ); if(reader == NULL) { xmlFreeTextReader(reader); rb_raise(rb_eRuntimeError, "couldn't create a parser"); } rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader); args[0] = rb_io; args[1] = rb_url; args[2] = encoding; rb_obj_call_init(rb_reader, 3, args); return rb_reader; }
- from_memory(p1, p2 = v2, p3 = v3, p4 = v4) Show Source
-
Create a new reader that parses string
-
static VALUE from_memory(int argc, VALUE *argv, VALUE klass) { VALUE rb_buffer, rb_url, encoding, rb_options; xmlTextReaderPtr reader; const char * c_url = NULL; const char * c_encoding = NULL; int c_options = 0; VALUE rb_reader, args[3]; rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options); if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil"); if (RTEST(rb_url)) c_url = StringValuePtr(rb_url); if (RTEST(encoding)) c_encoding = StringValuePtr(encoding); if (RTEST(rb_options)) c_options = (int)NUM2INT(rb_options); reader = xmlReaderForMemory( StringValuePtr(rb_buffer), (int)RSTRING_LEN(rb_buffer), c_url, c_encoding, c_options ); if(reader == NULL) { xmlFreeTextReader(reader); rb_raise(rb_eRuntimeError, "couldn't create a parser"); } rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader); args[0] = rb_buffer; args[1] = rb_url; args[2] = encoding; rb_obj_call_init(rb_reader, 3, args); return rb_reader; }
Public Instance Methods
- attribute(p1) Show Source
-
Get the value of attribute named name
-
static VALUE reader_attribute(VALUE self, VALUE name) { xmlTextReaderPtr reader; xmlChar *value ; VALUE rb_value; Data_Get_Struct(self, xmlTextReader, reader); if(NIL_P(name)) return Qnil; name = StringValue(name) ; value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValuePtr(name)); if(value == NULL) { /* this section is an attempt to workaround older versions of libxml that don't handle namespaces properly in all attribute-and-friends functions */ xmlChar *prefix = NULL ; xmlChar *localname = xmlSplitQName2((xmlChar*)StringValuePtr(name), &prefix); if (localname != NULL) { value = xmlTextReaderLookupNamespace(reader, localname); xmlFree(localname) ; } else { value = xmlTextReaderLookupNamespace(reader, prefix); } xmlFree(prefix); } if(value == NULL) return Qnil; rb_value = NOKOGIRI_STR_NEW2(value); xmlFree(value); return rb_value; }
- attribute_at(p1) Show Source
-
Get the value of attribute at index
-
static VALUE attribute_at(VALUE self, VALUE index) { xmlTextReaderPtr reader; xmlChar *value; VALUE rb_value; Data_Get_Struct(self, xmlTextReader, reader); if(NIL_P(index)) return Qnil; index = rb_Integer(index); value = xmlTextReaderGetAttributeNo( reader, (int)NUM2INT(index) ); if(value == NULL) return Qnil; rb_value = NOKOGIRI_STR_NEW2(value); xmlFree(value); return rb_value; }
- attribute_count() Show Source
-
Get the number of attributes for the current node
-
static VALUE attribute_count(VALUE self) { xmlTextReaderPtr reader; int count; Data_Get_Struct(self, xmlTextReader, reader); count = xmlTextReaderAttributeCount(reader); if(count == -1) return Qnil; return INT2NUM((long)count); }
- attribute_nodes() Show Source
-
Get a list of attributes for the current node
-
# File lib/nokogiri/xml/reader.rb, line 97 def attribute_nodes nodes = attr_nodes nodes.each { |v| v.instance_variable_set(:@_r, self) } nodes end
- attributes() Show Source
-
Get a list of attributes for the current node.
-
# File lib/nokogiri/xml/reader.rb, line 89 def attributes Hash[attribute_nodes.map { |node| [node.name, node.to_s] }].merge(namespaces || {}) end
- attributes?() Show Source
-
Does this node have attributes?
-
static VALUE attributes_eh(VALUE self) { xmlTextReaderPtr reader; int eh; Data_Get_Struct(self, xmlTextReader, reader); eh = has_attributes(reader); if(eh == 0) return Qfalse; if(eh == 1) return Qtrue; return Qnil; }
- base_uri() Show Source
-
Get the xml:base of the node
-
static VALUE base_uri(VALUE self) { xmlTextReaderPtr reader; const char * base_uri; Data_Get_Struct(self, xmlTextReader, reader); base_uri = (const char *)xmlTextReaderBaseUri(reader); if (base_uri == NULL) return Qnil; return NOKOGIRI_STR_NEW2(base_uri); }
- default?() Show Source
-
Was an attribute generated from the default value in the DTD or schema?
-
static VALUE default_eh(VALUE self) { xmlTextReaderPtr reader; int eh; Data_Get_Struct(self, xmlTextReader, reader); eh = xmlTextReaderIsDefault(reader); if(eh == 0) return Qfalse; if(eh == 1) return Qtrue; return Qnil; }
- depth() Show Source
-
Get the depth of the node
-
static VALUE depth(VALUE self) { xmlTextReaderPtr reader; int depth; Data_Get_Struct(self, xmlTextReader, reader); depth = xmlTextReaderDepth(reader); if(depth == -1) return Qnil; return INT2NUM((long)depth); }
- each() Show Source
-
Move the cursor through the document yielding the cursor to the block
-
# File lib/nokogiri/xml/reader.rb, line 105 def each while cursor = self.read yield cursor end end
- empty_element?() Show Source
-
Returns true if the current node is empty, otherwise false.
-
static VALUE empty_element_p(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); if(xmlTextReaderIsEmptyElement(reader)) return Qtrue; return Qfalse; }
- inner_xml() Show Source
-
Read the contents of the current node, including child nodes and markup. Returns a utf-8 encoded string.
-
static VALUE inner_xml(VALUE self) { xmlTextReaderPtr reader; xmlChar* value; VALUE str; Data_Get_Struct(self, xmlTextReader, reader); value = xmlTextReaderReadInnerXml(reader); str = Qnil; if(value) { str = NOKOGIRI_STR_NEW2((char*)value); xmlFree(value); } return str; }
- lang() Show Source
-
Get the xml:lang scope within which the node resides.
-
static VALUE lang(VALUE self) { xmlTextReaderPtr reader; const char *lang; Data_Get_Struct(self, xmlTextReader, reader); lang = (const char *)xmlTextReaderConstXmlLang(reader); if(lang == NULL) return Qnil; return NOKOGIRI_STR_NEW2(lang); }
- local_name() Show Source
-
Get the local name of the node
-
static VALUE local_name(VALUE self) { xmlTextReaderPtr reader; const char *name; Data_Get_Struct(self, xmlTextReader, reader); name = (const char *)xmlTextReaderConstLocalName(reader); if(name == NULL) return Qnil; return NOKOGIRI_STR_NEW2(name); }
- name() Show Source
-
Get the name of the node. Returns a utf-8 encoded string.
-
static VALUE name(VALUE self) { xmlTextReaderPtr reader; const char *name; Data_Get_Struct(self, xmlTextReader, reader); name = (const char *)xmlTextReaderConstName(reader); if(name == NULL) return Qnil; return NOKOGIRI_STR_NEW2(name); }
- namespace_uri() Show Source
-
Get the URI defining the namespace associated with the node
-
static VALUE namespace_uri(VALUE self) { xmlTextReaderPtr reader; const char *uri; Data_Get_Struct(self, xmlTextReader, reader); uri = (const char *)xmlTextReaderConstNamespaceUri(reader); if(uri == NULL) return Qnil; return NOKOGIRI_STR_NEW2(uri); }
- namespaces() Show Source
-
Get a hash of namespaces for this Node
-
static VALUE namespaces(VALUE self) { xmlTextReaderPtr reader; xmlNodePtr ptr; VALUE attr ; Data_Get_Struct(self, xmlTextReader, reader); attr = rb_hash_new() ; if (! has_attributes(reader)) return attr ; ptr = xmlTextReaderExpand(reader); if(ptr == NULL) return Qnil; Nokogiri_xml_node_namespaces(ptr, attr); return attr ; }
- node_type() Show Source
-
Get the type of readers current node
-
static VALUE node_type(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); return INT2NUM((long)xmlTextReaderNodeType(reader)); }
- outer_xml() Show Source
-
Read the current node and its contents, including child nodes and markup. Returns a utf-8 encoded string.
-
static VALUE outer_xml(VALUE self) { xmlTextReaderPtr reader; xmlChar *value; VALUE str = Qnil; Data_Get_Struct(self, xmlTextReader, reader); value = xmlTextReaderReadOuterXml(reader); if(value) { str = NOKOGIRI_STR_NEW2((char*)value); xmlFree(value); } return str; }
- prefix() Show Source
-
Get the shorthand reference to the namespace associated with the node.
-
static VALUE prefix(VALUE self) { xmlTextReaderPtr reader; const char *prefix; Data_Get_Struct(self, xmlTextReader, reader); prefix = (const char *)xmlTextReaderConstPrefix(reader); if(prefix == NULL) return Qnil; return NOKOGIRI_STR_NEW2(prefix); }
- read() Show Source
-
static VALUE read_more(VALUE self) { xmlTextReaderPtr reader; xmlErrorPtr error; VALUE error_list; int ret; Data_Get_Struct(self, xmlTextReader, reader); error_list = rb_funcall(self, rb_intern("errors"), 0); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); ret = xmlTextReaderRead(reader); xmlSetStructuredErrorFunc(NULL, NULL); if(ret == 1) return self; if(ret == 0) return Qnil; error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Error pulling: %d", ret); return Qnil; }
- state() Show Source
-
Get the state of the reader
-
static VALUE state(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); return INT2NUM((long)xmlTextReaderReadState(reader)); }
- value() Show Source
-
Get the text value of the node if present. Returns a utf-8 encoded string.
-
static VALUE value(VALUE self) { xmlTextReaderPtr reader; const char *value; Data_Get_Struct(self, xmlTextReader, reader); value = (const char *)xmlTextReaderConstValue(reader); if(value == NULL) return Qnil; return NOKOGIRI_STR_NEW2(value); }
- value?() Show Source
-
Does this node have a text value?
-
static VALUE value_eh(VALUE self) { xmlTextReaderPtr reader; int eh; Data_Get_Struct(self, xmlTextReader, reader); eh = xmlTextReaderHasValue(reader); if(eh == 0) return Qfalse; if(eh == 1) return Qtrue; return Qnil; }
- xml_version() Show Source
-
Get the XML version of the document being read
-
static VALUE xml_version(VALUE self) { xmlTextReaderPtr reader; const char *version; Data_Get_Struct(self, xmlTextReader, reader); version = (const char *)xmlTextReaderConstXmlVersion(reader); if(version == NULL) return Qnil; return NOKOGIRI_STR_NEW2(version); }