class Nokogiri::XML::Schema

Nokogiri::XML::Schema is used for validating XML against a schema (usually from an xsd file).

Synopsis

Validate an XML document against a Schema. Loop over the errors that are returned and print them out:

xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))
doc = Nokogiri::XML(File.read(PO_XML_FILE))

xsd.validate(doc).each do |error|
  puts error.message
end

The list of errors are Nokogiri::XML::SyntaxError objects.

NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities are not resolved from the network (‘http://` or `ftp://`). Previously, parsing treated documents as “trusted” by default which was counter to Nokogiri’s “untrusted by default” security policy. If a document is trusted, then the caller may turn off the NONET option via the ParseOptions to re-enable external entity resolution over a network connection.

Attributes

errors[RW]

Errors while parsing the schema file

parse_options[RW]

The Nokogiri::XML::ParseOptions used to parse the schema

Public Class Methods

from_document(doc) click to toggle source

Create a new Schema from the Nokogiri::XML::Document doc

static VALUE
from_document(int argc, VALUE *argv, VALUE klass)
{
  VALUE document;
  VALUE parse_options;
  int parse_options_int;
  xmlDocPtr doc;
  xmlSchemaParserCtxtPtr ctx;
  xmlSchemaPtr schema;
  VALUE errors;
  VALUE rb_schema;
  int scanned_args = 0;
  xmlExternalEntityLoader old_loader = 0;

  scanned_args = rb_scan_args(argc, argv, "11", &document, &parse_options);

  Data_Get_Struct(document, xmlDoc, doc);
  doc = doc->doc; /* In case someone passes us a node. ugh. */

  if (scanned_args == 1) {
    parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
  }
  parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));

  if (has_blank_nodes_p(DOC_NODE_CACHE(doc))) {
    rb_raise(rb_eArgError, "Creating a schema from a document that has blank nodes exposed to Ruby is dangerous");
  }

  ctx = xmlSchemaNewDocParserCtxt(doc);

  errors = rb_ary_new();
  xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);

#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
  xmlSchemaSetParserStructuredErrors(
    ctx,
    Nokogiri_error_array_pusher,
    (void *)errors
  );
#endif

  if (parse_options_int & XML_PARSE_NONET) {
    old_loader = xmlGetExternalEntityLoader();
    xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
  }

  schema = xmlSchemaParse(ctx);

  if (old_loader) {
    xmlSetExternalEntityLoader(old_loader);
  }

  xmlSetStructuredErrorFunc(NULL, NULL);
  xmlSchemaFreeParserCtxt(ctx);

  if (NULL == schema) {
    xmlErrorPtr error = xmlGetLastError();
    if (error) {
      Nokogiri_error_raise(NULL, error);
    } else {
      rb_raise(rb_eRuntimeError, "Could not parse document");
    }

    return Qnil;
  }

  rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
  rb_iv_set(rb_schema, "@errors", errors);
  rb_iv_set(rb_schema, "@parse_options", parse_options);

  return rb_schema;

  return Qnil;
}
new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA) click to toggle source

Create a new Nokogiri::XML::Schema object using a string_or_io object.

# File lib/nokogiri/xml/schema.rb, line 46
def self.new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
  from_document(Nokogiri::XML(string_or_io), options)
end
read_memory(string) click to toggle source

Create a new Schema from the contents of string

static VALUE
read_memory(int argc, VALUE *argv, VALUE klass)
{
  VALUE content;
  VALUE parse_options;
  int parse_options_int;
  xmlSchemaParserCtxtPtr ctx;
  xmlSchemaPtr schema;
  VALUE errors;
  VALUE rb_schema;
  int scanned_args = 0;
  xmlExternalEntityLoader old_loader = 0;

  scanned_args = rb_scan_args(argc, argv, "11", &content, &parse_options);
  if (scanned_args == 1) {
    parse_options = rb_const_get_at(rb_const_get_at(mNokogiriXml, rb_intern("ParseOptions")), rb_intern("DEFAULT_SCHEMA"));
  }
  parse_options_int = (int)NUM2INT(rb_funcall(parse_options, rb_intern("to_i"), 0));

  ctx = xmlSchemaNewMemParserCtxt((const char *)StringValuePtr(content), (int)RSTRING_LEN(content));

  errors = rb_ary_new();
  xmlSetStructuredErrorFunc((void *)errors, Nokogiri_error_array_pusher);

#ifdef HAVE_XMLSCHEMASETPARSERSTRUCTUREDERRORS
  xmlSchemaSetParserStructuredErrors(
    ctx,
    Nokogiri_error_array_pusher,
    (void *)errors
  );
#endif

  if (parse_options_int & XML_PARSE_NONET) {
    old_loader = xmlGetExternalEntityLoader();
    xmlSetExternalEntityLoader(xmlNoNetExternalEntityLoader);
  }

  schema = xmlSchemaParse(ctx);

  if (old_loader) {
    xmlSetExternalEntityLoader(old_loader);
  }

  xmlSetStructuredErrorFunc(NULL, NULL);
  xmlSchemaFreeParserCtxt(ctx);

  if (NULL == schema) {
    xmlErrorPtr error = xmlGetLastError();
    if (error) {
      Nokogiri_error_raise(NULL, error);
    } else {
      rb_raise(rb_eRuntimeError, "Could not parse document");
    }

    return Qnil;
  }

  rb_schema = Data_Wrap_Struct(klass, 0, dealloc, schema);
  rb_iv_set(rb_schema, "@errors", errors);
  rb_iv_set(rb_schema, "@parse_options", parse_options);

  return rb_schema;
}

Public Instance Methods

valid?(thing) click to toggle source

Returns true if thing is a valid Nokogiri::XML::Document or file.

# File lib/nokogiri/xml/schema.rb, line 68
def valid?(thing)
  validate(thing).empty?
end
validate(thing) click to toggle source

Validate thing against this schema. thing can be a Nokogiri::XML::Document object, or a filename. An Array of Nokogiri::XML::SyntaxError objects found while validating the thing is returned.

# File lib/nokogiri/xml/schema.rb, line 55
def validate(thing)
  if thing.is_a?(Nokogiri::XML::Document)
    validate_document(thing)
  elsif File.file?(thing)
    validate_file(thing)
  else
    raise ArgumentError, "Must provide Nokogiri::Xml::Document or the name of an existing file"
  end
end