Install
sudo gem install nokogiri
Contribute
github.com/tenderlove/nokogiri

An HTML, XML, SAX, & Reader parser with the ability to search documents via XPath or CSS3 selectors… and much more

Nokogiri

Class Nokogiri::XML::Reader inherits from Object

Nokogiri::XML::Reader parses an XML document similar to the way a cursor would move. The Reader is given an XML document, and yields nodes to an each block.

Here is an example of usage:

reader = Nokogiri::XML::Reader(<<-eoxml)
  <x xmlns:tenderlove='http://tenderlovemaking.com/'>
    <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
  </x>
eoxml

reader.each do |node|

  # node is an instance of Nokogiri::XML::Reader
  puts node.name

end

Note that Nokogiri::XML::Reader#each can only be called once!! Once the cursor moves through the entire document, you must parse the document again. So make sure that you capture any information you need during the first iteration.

The Reader parser is good for when you need the speed of a SAX parser, but do not want to write a Document handler.

Attributes

encoding R

The encoding for the document

errors RW

A list of errors encountered while parsing

source R

The XML source

Public Class Methods

from_io(...) Show Source

Create a new reader that parses io

static VALUE from_io(int argc, VALUE *argv, VALUE klass) { VALUE rb_io, rb_url, encoding, rb_options; const char * c_url = NULL; const char * c_encoding = NULL; int c_options = 0; rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options); if (!RTEST(rb_io)) rb_raise(rb_eArgError, "io cannot be nil"); if (RTEST(rb_url)) c_url = StringValuePtr(rb_url); if (RTEST(encoding)) c_encoding = StringValuePtr(encoding); if (RTEST(rb_options)) c_options = NUM2INT(rb_options); xmlTextReaderPtr reader = xmlReaderForIO( (xmlInputReadCallback)io_read_callback, (xmlInputCloseCallback)io_close_callback, (void *)rb_io, c_url, c_encoding, c_options ); if(reader == NULL) { xmlFreeTextReader(reader); rb_raise(rb_eRuntimeError, "couldn't create a parser"); } VALUE rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader); VALUE args[3] = {rb_io, rb_url, encoding}; rb_obj_call_init(rb_reader, 3, args); return rb_reader; }
from_memory(...) Show Source

Create a new reader that parses string

static VALUE from_memory(int argc, VALUE *argv, VALUE klass) { VALUE rb_buffer, rb_url, encoding, rb_options; const char * c_url = NULL; const char * c_encoding = NULL; int c_options = 0; rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options); if (!RTEST(rb_buffer)) rb_raise(rb_eArgError, "string cannot be nil"); if (RTEST(rb_url)) c_url = StringValuePtr(rb_url); if (RTEST(encoding)) c_encoding = StringValuePtr(encoding); if (RTEST(rb_options)) c_options = NUM2INT(rb_options); xmlTextReaderPtr reader = xmlReaderForMemory( StringValuePtr(rb_buffer), RSTRING_LEN(rb_buffer), c_url, c_encoding, c_options ); if(reader == NULL) { xmlFreeTextReader(reader); rb_raise(rb_eRuntimeError, "couldn't create a parser"); } VALUE rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader); VALUE args[3] = {rb_buffer, rb_url, encoding}; rb_obj_call_init(rb_reader, 3, args); return rb_reader; }

Public Instance Methods

attribute(p1) Show Source

Get the value of attribute named name

static VALUE reader_attribute(VALUE self, VALUE name) { xmlTextReaderPtr reader; xmlChar *value ; Data_Get_Struct(self, xmlTextReader, reader); if(NIL_P(name)) return Qnil; name = StringValue(name) ; value = xmlTextReaderGetAttribute(reader, (xmlChar*)StringValuePtr(name)); if(value == NULL) { /* this section is an attempt to workaround older versions of libxml that don't handle namespaces properly in all attribute-and-friends functions */ xmlChar *prefix = NULL ; xmlChar *localname = xmlSplitQName2((xmlChar*)StringValuePtr(name), &prefix); if (localname != NULL) { value = xmlTextReaderLookupNamespace(reader, localname); xmlFree(localname) ; } else { value = xmlTextReaderLookupNamespace(reader, prefix); } xmlFree(prefix); } if(value == NULL) return Qnil; VALUE rb_value = NOKOGIRI_STR_NEW2(value); xmlFree(value); return rb_value; }
attribute_at(p1) Show Source

Get the value of attribute at index

static VALUE attribute_at(VALUE self, VALUE index) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); if(NIL_P(index)) return Qnil; index = rb_Integer(index); xmlChar * value = xmlTextReaderGetAttributeNo( reader, NUM2INT(index) ); if(value == NULL) return Qnil; VALUE rb_value = NOKOGIRI_STR_NEW2(value); xmlFree(value); return rb_value; }
attribute_count() Show Source

Get the number of attributes for the current node

static VALUE attribute_count(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); int count = xmlTextReaderAttributeCount(reader); if(count == -1) return Qnil; return INT2NUM((long)count); }
attribute_nodes() Show Source

Get a list of attributes for the current node

# File lib/nokogiri/xml/reader.rb, line 59 def attribute_nodes nodes = attr_nodes nodes.each { |v| v.instance_variable_set(:@_r, self) } nodes end
attributes() Show Source

Get a list of attributes for the current node.

# File lib/nokogiri/xml/reader.rb, line 51 def attributes Hash[*(attribute_nodes.map { |node| [node.name, node.to_s] }.flatten)].merge(namespaces || {}) end
attributes?() Show Source

Does this node have attributes?

static VALUE attributes_eh(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); int eh = has_attributes(reader); if(eh == 0) return Qfalse; if(eh == 1) return Qtrue; return Qnil; }
default?() Show Source

Was an attribute generated from the default value in the DTD or schema?

static VALUE default_eh(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); int eh = xmlTextReaderIsDefault(reader); if(eh == 0) return Qfalse; if(eh == 1) return Qtrue; return Qnil; }
depth() Show Source

Get the depth of the node

static VALUE depth(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); int depth = xmlTextReaderDepth(reader); if(depth == -1) return Qnil; return INT2NUM((long)depth); }
each(&block) Show Source

Move the cursor through the document yielding each node to the block

# File lib/nokogiri/xml/reader.rb, line 67 def each(&block) while node = self.read block.call(node) end end
inner_xml() Show Source

Read the contents of the current node, including child nodes and markup.

static VALUE inner_xml(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); const char * value = (const char *)xmlTextReaderReadInnerXml(reader); if(value == NULL) return Qnil; else return NOKOGIRI_STR_NEW2(value); }
lang() Show Source

Get the xml:lang scope within which the node resides.

static VALUE lang(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); const char * lang = (const char *)xmlTextReaderConstXmlLang(reader); if(lang == NULL) return Qnil; return NOKOGIRI_STR_NEW2(lang); }
local_name() Show Source

Get the local name of the node

static VALUE local_name(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); const char * name = (const char *)xmlTextReaderConstLocalName(reader); if(name == NULL) return Qnil; return NOKOGIRI_STR_NEW2(name); }
name() Show Source

Get the name of the node

static VALUE name(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); const char * name = (const char *)xmlTextReaderConstName(reader); if(name == NULL) return Qnil; return NOKOGIRI_STR_NEW2(name); }
namespace_uri() Show Source

Get the URI defining the namespace associated with the node

static VALUE namespace_uri(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); const char * uri = (const char *)xmlTextReaderConstNamespaceUri(reader); if(uri == NULL) return Qnil; return NOKOGIRI_STR_NEW2(uri); }
namespaces() Show Source

Get a hash of namespaces for this Node

static VALUE namespaces(VALUE self) { xmlTextReaderPtr reader; VALUE attr ; Data_Get_Struct(self, xmlTextReader, reader); attr = rb_hash_new() ; if (! has_attributes(reader)) return attr ; xmlNodePtr ptr = xmlTextReaderExpand(reader); if(ptr == NULL) return Qnil; Nokogiri_xml_node_namespaces(ptr, attr); return attr ; }
node_type() Show Source

Get the type of readers current node

static VALUE node_type(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); return INT2NUM((long)xmlTextReaderNodeType(reader)); }
outer_xml() Show Source

Read the current node and its contents, including child nodes and markup.

static VALUE outer_xml(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); const char * value = (const char *)xmlTextReaderReadOuterXml(reader); if(value == NULL) return Qnil; else return NOKOGIRI_STR_NEW2(value); }
prefix() Show Source

Get the shorthand reference to the namespace associated with the node.

static VALUE prefix(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); const char * prefix = (const char *)xmlTextReaderConstPrefix(reader); if(prefix == NULL) return Qnil; return NOKOGIRI_STR_NEW2(prefix); }
read() Show Source

Move the Reader forward through the XML document.

static VALUE read_more(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); VALUE error_list = rb_funcall(self, rb_intern("errors"), 0); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); int ret = xmlTextReaderRead(reader); xmlSetStructuredErrorFunc(NULL, NULL); if(ret == 1) return self; if(ret == 0) return Qnil; xmlErrorPtr error = xmlGetLastError(); if(error) rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)); else rb_raise(rb_eRuntimeError, "Error pulling: %d", ret); return Qnil; }
state() Show Source

Get the state of the reader

static VALUE state(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); return INT2NUM((long)xmlTextReaderReadState(reader)); }
value() Show Source

Get the text value of the node if present

static VALUE value(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); const char * value = (const char *)xmlTextReaderConstValue(reader); if(value == NULL) return Qnil; return NOKOGIRI_STR_NEW2(value); }
value?() Show Source

Does this node have a text value?

static VALUE value_eh(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); int eh = xmlTextReaderHasValue(reader); if(eh == 0) return Qfalse; if(eh == 1) return Qtrue; return Qnil; }
xml_version() Show Source

Get the XML version of the document being read

static VALUE xml_version(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); const char * version = (const char *)xmlTextReaderConstXmlVersion(reader); if(version == NULL) return Qnil; return NOKOGIRI_STR_NEW2(version); }