RDF processor

From: Peter F. Patel-Schneider (pfps@research.bell-labs.com)
Date: 10/18/01


As promised (threatened?) I put together an RDF(-ish) processor.  It uses
the announced-but-not-yet-distributed galax XML Query system.  (This is why
I'm not yet sending the processor out to rdf-interest.)

The processor (process file) takes the name of a file containing XML
fragments and produces an interpretation.  I believe that an XML document
would also work.

The file is first processed through Galax to produce an XQuery data model.
Then that data model is processed to produce an RDF-ish interpretation.  If
the input file consists of RDF fragments (the rdf:rdf wrapper is not
handled), then the interpretation should be equivalent to one of Pat
Hayes's RDF interpretations.  (I may even write a function to do the
conversion, but not quite yet.)

There is also a models function that determines if an interpretation is a
model for a XQuery data model and an entails function that implements
entailment in RDF (between two XQuery data models).

Warning:  This code has had very limited testing!

Peter F. Patel-Schneider



(* Copyright (c) 2001 Lucent Technologies.  All rights reserved. *)

module Set = (* very simple set *)
struct
  type 'a set = 'a list
  let empty = [ ]
  let member s i = List.exists (fun(m)-> i=m) s
  let add s i  = if member s i then s else i::s
end

module Map = (* a very simple map *)
struct 
  type ('a,'b) map = ('a * 'b) list
  let empty = []
  let find m a = 
      try Some (snd (List.find (fun(s,t)->s=a) m))
      with Not_found -> None
  let rec insert m a b = 
        match m with [] -> (a,b) :: m
        | (s,t) :: r -> if a=s then (a,b)::r
                      else (s,t) :: (insert r a b)
end

let split s = (* split at ':', very ugly due to caml strings *)
    if String.contains s ':'
    then let position = String.index s ':'
         in let remainder = (String.length s) - position - 1
         in let left = String.create position
         in let right = String.create remainder
         in let _ = String.blit s 0 left 0 position
         in let _ = String.blit s (position+1) right 0 remainder
	 in ( left, right )
    else ( "", s )

let image e r = match Map.find e r with None -> Set.empty | Some p -> p
let extend e s t = Map.insert e s (Set.add (image e s) t)

type resource = Namespace.rqname
type value = Typed of Datamodel._ValueNode
           | Untyped of string
type objct = Resource of resource
	   | Data of value

(* RDF interpretations *)
(* because nodes are resources, IS is identity is not needed *)
type interpretation = { r : resource Set.set;
			ext : ( resource , objct Set.set ) Map.map;
			cext : ( resource , resource Set.set ) Map.map }
let emptyIntr = { r = Set.empty; ext = Map.empty; cext = Map.empty }

let expand nsenv qname = (Resolve.resolve_qname nsenv qname)

let makeUQName prefix uri =
  if prefix="" then (Namespace.NSDefaultPrefix,uri)
  else ((Namespace.NSPrefix prefix),uri)

let rdfns = 
    (Namespace.add_ns
     (Namespace.add_ns
      (Namespace.add_ns (Namespace.empty_nsenv())
       (Namespace.NSPrefix "xml")
       (Namespace.NSUri "http://www.w3.org/XML/1998/namespace"))
      (Namespace.NSPrefix "rdf") (Namespace.NSUri "//rdf"))
     (Namespace.NSPrefix "xsi") (Namespace.NSUri "//xsi"))
let rdfid = expand rdfns (makeUQName "rdf" "ID")
let rdfabout = expand rdfns (makeUQName "rdf" "about")
let rdfresource = expand rdfns (makeUQName "rdf" "resource")
let rdftype = expand rdfns (makeUQName "rdf" "type")
let xsitype = expand rdfns (makeUQName "xsi" "type")

let attributeNamed nm ass = 
    let ok(n) = (Datamodel.attrName n) = nm
    in try Some ( List.find ok (Datamodel.list_of_oforest ass) )
       with Not_found -> None

let nspaceExtend nsenv nsnode = 
    let add ns nsn = Namespace.add_ns ns (Datamodel.nsPrefix nsn)
				         (Datamodel.nsUri nsn)
    in Datamodel.oforestFold_left add nsenv nsnode

let gensym = ref 0
let bid () =
    let _ = gensym := 1 + (!gensym)
    in (Namespace.NSUri(":"),(string_of_int (!gensym)))

let structural nm = 
    (snd rdfid) = nm  or  (snd rdfabout) = nm  or
    (snd rdfresource) = nm  or  (snd xsitype) = nm

let attrString nd = 
    Datamodel.oforestFold_left (fun s v -> (Datamodel.string_of_value v) ^ s)
           "" (Datamodel.attrValue nd) 

let attrValue nd typ = (* ** untyped for now *)
    Untyped ( attrString nd )

let valueValue nd typ = (* ** untyped for now *)
    Untyped( Datamodel.string_of_value nd)

let attrToResource nd nsenv = 
    let ( prefix, local )  = split (attrString nd)
    in snd(expand nsenv (makeUQName prefix local))

let id (n:Datamodel._ElemNode) nsenv = 
    match attributeNamed (Sym.rattr_symbol rdfid)
	  (Datamodel.elemAttributes n) with
      Some attr -> Some(attrToResource attr nsenv)
    | None -> match attributeNamed (Sym.rattr_symbol rdfabout)
		    (Datamodel.elemAttributes n) with
		Some attr -> Some(attrToResource attr nsenv)
	      | None -> None

(* these work only on trees for now *)

(* determine the resource and partial model for a node, dm,
   given the interpretation so far, i, the type from the parent, typ,
             and the namespace environment, nsenv *)
let canonicalIntr dm i nsenv =
  let rec cm n i typ nsenv =
      match i with {r=r; ext=ext; cext=cext} ->
      match Datamodel.getNodeKind n with
        Datamodel.DocNodeKind -> 
	  let pc i child = snd(cm child i None nsenv)
	  in (None,Datamodel.oforestFold_left pc i
	           (Datamodel.docChildren(Datamodel.getrDocNode(n)))) 
     | Datamodel.ElemNodeKind -> 
	 let n = Datamodel.getrElemNode n
	 in let nsenv = nspaceExtend nsenv (Datamodel.elemNamespaces n)
         in let er : resource =  match (id n nsenv) with None -> bid()
						 | Some id -> id
	 in let nr : resource = (Sym.relem_name (Datamodel.elemName n))
	 in let didme = { r=Set.add (Set.add r er) nr; ext=ext;
			     cext = extend cext nr er }
	 in let pchild i child =
	      match cm child i None (* ** element type *) nsenv with
		(None,i) -> i
	      | (Some rr,{r=r;ext=ext;cext=cext}) ->
		 match rr with
		   Resource rrr->
                     if Set.member (image cext (snd rdftype)) rrr
                     then let addClass cext c =
		               match c with Resource r -> extend cext r rrr
					  | _ -> cext
			  in {r=r; ext = extend ext er rr;
			      cext=List.fold_left addClass cext (image ext rrr)}
		     else {r=r; ext = extend ext er rr; cext=cext}
	        | _ -> {r=r; ext = extend ext er rr; cext=cext}
         in let pachild i achild = pchild i (Datamodel.buildAttrNode achild)
	 in let didattrs = Datamodel.oforestFold_left pachild didme
				       (Datamodel.elemAttributes(n))
	 in let didchildren = Datamodel.oforestFold_left pchild didattrs
					     (Datamodel.elemChildren(n))
	 in (Some(Resource(er)), didchildren) 
     | Datamodel.AttrNodeKind ->
	 let n = Datamodel.getrAttrNode n
	 in if (Sym.rattr_name (Datamodel.attrName n)) = (snd rdfresource)
	    then ( Some ( Resource ( attrToResource n nsenv ) ), i )
	    else if structural(Sym.rattr_name(Datamodel.attrName n))
	    then (None,i)
	    else let ar : resource = bid() (* attributes have no ID *)
		 in let nr : resource = (Sym.rattr_name (Datamodel.attrName n))
		 in let v = Data(attrValue n None (* ** attribute type *) )
		 in let r = Set.add (Set.add r ar) nr
		 in let ext = extend ext ar v
		 in let cext = extend cext nr ar
		 in ( Some (Resource(ar)), { r=r; cext=cext; ext=ext } ) 
     | Datamodel.ValueNodeKind ->
         ( Some(Data(valueValue (Datamodel.getrValueNode n) typ)), i )
     | _ -> (None,i)
  in snd(cm dm i None nsenv)

let canonical dm nsenv = canonicalIntr dm emptyIntr nsenv

let canonicalForest dm nsenv =
    Datamodel.oforestFold_left (fun i d -> canonicalIntr d i nsenv) emptyIntr dm

(* add in extra stuff to handle an explicit rdftype link *)
let augment i typ =
    match i with {r=r; ext=ext; cext=cext} -> 
    let node = bid()
    in (node, { r = Set.add r node; ext = extend ext node (Resource typ);
	        cext = extend cext (snd rdftype) node })

let checkValue d typ vnode = (* ** needs to be fixed up for typeing *)
    match d with
      (Resource _) -> false
    | Data(Untyped s) ->
      ( match typ with None -> s = (Datamodel.string_of_value vnode)
                     | Some _ -> false )
    | Data(Typed data) ->
      ( match typ with
	  None -> (Datamodel.value_equal data vnode) (* ** *)
       | Some typ -> (Datamodel.value_equal data vnode) (* ** *) )

let checkAttribute res (nd:Datamodel._AttrNode) i =
  match i with { r=r; ext=ext; cext=cext } ->
  (Set.member (image cext res) (Sym.rattr_name(Datamodel.attrName nd)))
  && let vnode = Datamodel.oforestHead (Datamodel.attrValue nd)
     in List.exists (fun(v) -> checkValue v None (* ** attribute type *) vnode)
		    (image ext res)

let rec modelAttributes pnt attrs i ns = 
  match i with { r=r; ext=ext; cext=cext } ->
  if Datamodel.oforestIsEmpty attrs then true
  else let a = Datamodel.oforestHead attrs
       in let attrs = Datamodel.oforestTail attrs
       in if (Sym.rattr_name (Datamodel.attrName a)) = (snd rdfresource)
	  then let possible = image ext pnt
	       and target = Resource ( attrToResource a ns )
	       in ( List.exists (fun(e)->e=target) possible ) &&
 	           ( modelAttributes pnt attrs i ns )
	  else if structural(Sym.rattr_name(Datamodel.attrName a))
	  then modelAttributes pnt attrs i ns
	  else let possible = image ext pnt
	       and ca(p) = match p with
			     Resource p -> checkAttribute p a i
	        	   | Data p -> false
	       in ( List.exists ca possible ) &&
		  ( modelAttributes pnt attrs i ns )

let rec checkElement resource nd i ns =
    match i with { r=r; ext=ext; cext=cext } ->
    (Set.member (image cext (Sym.relem_name (Datamodel.elemName nd)))
		resource) &&
    (modelAttributes resource (Datamodel.elemAttributes nd) i ns) &&
    (modelChildren resource None (* ** element type *)
		   (Datamodel.elemChildren nd) i ns)

and modelChildren pnt ptype children i ns =
    match i with { r=r; ext=ext; cext=cext } ->
    if Datamodel.oforestIsEmpty children then true
    else let c = Datamodel.oforestHead children
	 in let children = Datamodel.oforestTail children
	 in match Datamodel.getNodeKind c with
	      Datamodel.ElemNodeKind ->
	      let c = Datamodel.getrElemNode(c)
	      in let possible = image ext pnt
	      in let ce r  = ( match r with Resource r -> checkElement r c i ns
			                  | Data r -> false )
	      in if (List.exists ce possible) ||
		    if (Sym.relem_name (Datamodel.elemName c) = (snd rdftype))
		    then let possibletype =
			      image cext (Sym.relem_name (Datamodel.elemName c))
		         and cet(typ) = let (n,ei) = augment i typ
					in checkElement n c ei ns 
		         in List.exists cet possibletype
		    else false
		 then (modelChildren pnt ptype children i 
			    (nspaceExtend ns (Datamodel.elemNamespaces c)))
		 else false
	     | Datamodel.ValueNodeKind ->
		let possible = image ext pnt
		and ct(r) = checkValue r ptype (Datamodel.getrValueNode c)
		in List.exists ct possible 
	     | _ -> modelChildren pnt ptype children i ns

let models dm i ns =
    let rec rootmodels n i ns =
	match i with { r=r; ext=ext; cext=cext } ->
	match Datamodel.getNodeKind n with
	  Datamodel.DocNodeKind -> 
	   List.for_all (fun n -> rootmodels n i ns)
	        (Datamodel.list_of_oforest
		     (Datamodel.docChildren (Datamodel.getrDocNode n)))
	| Datamodel.ElemNodeKind -> 
	   let nd = Datamodel.getrElemNode(n)
	   in let nid = (id nd ns)
	   in (match nid with
		 Some id -> checkElement id nd i (Namespace.empty_nsenv())
	       | None -> List.exists (fun(res)->checkElement res nd i ns) r)
	 | _ -> true
   in rootmodels dm i ns


let entails d1 d2 = models d2 (canonical d1 rdfns) rdfns

let entailsForest d1 d2 = models d2 (canonicalForest d1 rdfns) rdfns

let read file ns = 
    let ast = Parseutil.parseXML file
    in Load.load_xml_forest ns ast

let process file =
    let ns = Namespace.add_ns rdfns
	      (Namespace.NSDefaultPrefix) (Namespace.NSUri file)
    in let dm = read file ns
    in ( dm, canonicalForest dm ns, ns )

let selfentails file =
    match process file with ( dm, i, ns) ->
    let mds = List.for_all (fun dm -> models dm i ns)
	        (Datamodel.list_of_oforest dm)
    in ( i, mds )


This archive was generated by hypermail 2.1.4 : 04/02/02 EST