From: Peter F. Patel-Schneider ([email protected])
Date: 10/18/01
As promised (threatened?) I put together an RDF(-ish) processor. It uses the announced-but-not-yet-distributed galax XML Query system. (This is why I'm not yet sending the processor out to rdf-interest.) The processor (process file) takes the name of a file containing XML fragments and produces an interpretation. I believe that an XML document would also work. The file is first processed through Galax to produce an XQuery data model. Then that data model is processed to produce an RDF-ish interpretation. If the input file consists of RDF fragments (the rdf:rdf wrapper is not handled), then the interpretation should be equivalent to one of Pat Hayes's RDF interpretations. (I may even write a function to do the conversion, but not quite yet.) There is also a models function that determines if an interpretation is a model for a XQuery data model and an entails function that implements entailment in RDF (between two XQuery data models). Warning: This code has had very limited testing! Peter F. Patel-Schneider (* Copyright (c) 2001 Lucent Technologies. All rights reserved. *) module Set = (* very simple set *) struct type 'a set = 'a list let empty = [ ] let member s i = List.exists (fun(m)-> i=m) s let add s i = if member s i then s else i::s end module Map = (* a very simple map *) struct type ('a,'b) map = ('a * 'b) list let empty = [] let find m a = try Some (snd (List.find (fun(s,t)->s=a) m)) with Not_found -> None let rec insert m a b = match m with [] -> (a,b) :: m | (s,t) :: r -> if a=s then (a,b)::r else (s,t) :: (insert r a b) end let split s = (* split at ':', very ugly due to caml strings *) if String.contains s ':' then let position = String.index s ':' in let remainder = (String.length s) - position - 1 in let left = String.create position in let right = String.create remainder in let _ = String.blit s 0 left 0 position in let _ = String.blit s (position+1) right 0 remainder in ( left, right ) else ( "", s ) let image e r = match Map.find e r with None -> Set.empty | Some p -> p let extend e s t = Map.insert e s (Set.add (image e s) t) type resource = Namespace.rqname type value = Typed of Datamodel._ValueNode | Untyped of string type objct = Resource of resource | Data of value (* RDF interpretations *) (* because nodes are resources, IS is identity is not needed *) type interpretation = { r : resource Set.set; ext : ( resource , objct Set.set ) Map.map; cext : ( resource , resource Set.set ) Map.map } let emptyIntr = { r = Set.empty; ext = Map.empty; cext = Map.empty } let expand nsenv qname = (Resolve.resolve_qname nsenv qname) let makeUQName prefix uri = if prefix="" then (Namespace.NSDefaultPrefix,uri) else ((Namespace.NSPrefix prefix),uri) let rdfns = (Namespace.add_ns (Namespace.add_ns (Namespace.add_ns (Namespace.empty_nsenv()) (Namespace.NSPrefix "xml") (Namespace.NSUri "http://www.w3.org/XML/1998/namespace")) (Namespace.NSPrefix "rdf") (Namespace.NSUri "//rdf")) (Namespace.NSPrefix "xsi") (Namespace.NSUri "//xsi")) let rdfid = expand rdfns (makeUQName "rdf" "ID") let rdfabout = expand rdfns (makeUQName "rdf" "about") let rdfresource = expand rdfns (makeUQName "rdf" "resource") let rdftype = expand rdfns (makeUQName "rdf" "type") let xsitype = expand rdfns (makeUQName "xsi" "type") let attributeNamed nm ass = let ok(n) = (Datamodel.attrName n) = nm in try Some ( List.find ok (Datamodel.list_of_oforest ass) ) with Not_found -> None let nspaceExtend nsenv nsnode = let add ns nsn = Namespace.add_ns ns (Datamodel.nsPrefix nsn) (Datamodel.nsUri nsn) in Datamodel.oforestFold_left add nsenv nsnode let gensym = ref 0 let bid () = let _ = gensym := 1 + (!gensym) in (Namespace.NSUri(":"),(string_of_int (!gensym))) let structural nm = (snd rdfid) = nm or (snd rdfabout) = nm or (snd rdfresource) = nm or (snd xsitype) = nm let attrString nd = Datamodel.oforestFold_left (fun s v -> (Datamodel.string_of_value v) ^ s) "" (Datamodel.attrValue nd) let attrValue nd typ = (* ** untyped for now *) Untyped ( attrString nd ) let valueValue nd typ = (* ** untyped for now *) Untyped( Datamodel.string_of_value nd) let attrToResource nd nsenv = let ( prefix, local ) = split (attrString nd) in snd(expand nsenv (makeUQName prefix local)) let id (n:Datamodel._ElemNode) nsenv = match attributeNamed (Sym.rattr_symbol rdfid) (Datamodel.elemAttributes n) with Some attr -> Some(attrToResource attr nsenv) | None -> match attributeNamed (Sym.rattr_symbol rdfabout) (Datamodel.elemAttributes n) with Some attr -> Some(attrToResource attr nsenv) | None -> None (* these work only on trees for now *) (* determine the resource and partial model for a node, dm, given the interpretation so far, i, the type from the parent, typ, and the namespace environment, nsenv *) let canonicalIntr dm i nsenv = let rec cm n i typ nsenv = match i with {r=r; ext=ext; cext=cext} -> match Datamodel.getNodeKind n with Datamodel.DocNodeKind -> let pc i child = snd(cm child i None nsenv) in (None,Datamodel.oforestFold_left pc i (Datamodel.docChildren(Datamodel.getrDocNode(n)))) | Datamodel.ElemNodeKind -> let n = Datamodel.getrElemNode n in let nsenv = nspaceExtend nsenv (Datamodel.elemNamespaces n) in let er : resource = match (id n nsenv) with None -> bid() | Some id -> id in let nr : resource = (Sym.relem_name (Datamodel.elemName n)) in let didme = { r=Set.add (Set.add r er) nr; ext=ext; cext = extend cext nr er } in let pchild i child = match cm child i None (* ** element type *) nsenv with (None,i) -> i | (Some rr,{r=r;ext=ext;cext=cext}) -> match rr with Resource rrr-> if Set.member (image cext (snd rdftype)) rrr then let addClass cext c = match c with Resource r -> extend cext r rrr | _ -> cext in {r=r; ext = extend ext er rr; cext=List.fold_left addClass cext (image ext rrr)} else {r=r; ext = extend ext er rr; cext=cext} | _ -> {r=r; ext = extend ext er rr; cext=cext} in let pachild i achild = pchild i (Datamodel.buildAttrNode achild) in let didattrs = Datamodel.oforestFold_left pachild didme (Datamodel.elemAttributes(n)) in let didchildren = Datamodel.oforestFold_left pchild didattrs (Datamodel.elemChildren(n)) in (Some(Resource(er)), didchildren) | Datamodel.AttrNodeKind -> let n = Datamodel.getrAttrNode n in if (Sym.rattr_name (Datamodel.attrName n)) = (snd rdfresource) then ( Some ( Resource ( attrToResource n nsenv ) ), i ) else if structural(Sym.rattr_name(Datamodel.attrName n)) then (None,i) else let ar : resource = bid() (* attributes have no ID *) in let nr : resource = (Sym.rattr_name (Datamodel.attrName n)) in let v = Data(attrValue n None (* ** attribute type *) ) in let r = Set.add (Set.add r ar) nr in let ext = extend ext ar v in let cext = extend cext nr ar in ( Some (Resource(ar)), { r=r; cext=cext; ext=ext } ) | Datamodel.ValueNodeKind -> ( Some(Data(valueValue (Datamodel.getrValueNode n) typ)), i ) | _ -> (None,i) in snd(cm dm i None nsenv) let canonical dm nsenv = canonicalIntr dm emptyIntr nsenv let canonicalForest dm nsenv = Datamodel.oforestFold_left (fun i d -> canonicalIntr d i nsenv) emptyIntr dm (* add in extra stuff to handle an explicit rdftype link *) let augment i typ = match i with {r=r; ext=ext; cext=cext} -> let node = bid() in (node, { r = Set.add r node; ext = extend ext node (Resource typ); cext = extend cext (snd rdftype) node }) let checkValue d typ vnode = (* ** needs to be fixed up for typeing *) match d with (Resource _) -> false | Data(Untyped s) -> ( match typ with None -> s = (Datamodel.string_of_value vnode) | Some _ -> false ) | Data(Typed data) -> ( match typ with None -> (Datamodel.value_equal data vnode) (* ** *) | Some typ -> (Datamodel.value_equal data vnode) (* ** *) ) let checkAttribute res (nd:Datamodel._AttrNode) i = match i with { r=r; ext=ext; cext=cext } -> (Set.member (image cext res) (Sym.rattr_name(Datamodel.attrName nd))) && let vnode = Datamodel.oforestHead (Datamodel.attrValue nd) in List.exists (fun(v) -> checkValue v None (* ** attribute type *) vnode) (image ext res) let rec modelAttributes pnt attrs i ns = match i with { r=r; ext=ext; cext=cext } -> if Datamodel.oforestIsEmpty attrs then true else let a = Datamodel.oforestHead attrs in let attrs = Datamodel.oforestTail attrs in if (Sym.rattr_name (Datamodel.attrName a)) = (snd rdfresource) then let possible = image ext pnt and target = Resource ( attrToResource a ns ) in ( List.exists (fun(e)->e=target) possible ) && ( modelAttributes pnt attrs i ns ) else if structural(Sym.rattr_name(Datamodel.attrName a)) then modelAttributes pnt attrs i ns else let possible = image ext pnt and ca(p) = match p with Resource p -> checkAttribute p a i | Data p -> false in ( List.exists ca possible ) && ( modelAttributes pnt attrs i ns ) let rec checkElement resource nd i ns = match i with { r=r; ext=ext; cext=cext } -> (Set.member (image cext (Sym.relem_name (Datamodel.elemName nd))) resource) && (modelAttributes resource (Datamodel.elemAttributes nd) i ns) && (modelChildren resource None (* ** element type *) (Datamodel.elemChildren nd) i ns) and modelChildren pnt ptype children i ns = match i with { r=r; ext=ext; cext=cext } -> if Datamodel.oforestIsEmpty children then true else let c = Datamodel.oforestHead children in let children = Datamodel.oforestTail children in match Datamodel.getNodeKind c with Datamodel.ElemNodeKind -> let c = Datamodel.getrElemNode(c) in let possible = image ext pnt in let ce r = ( match r with Resource r -> checkElement r c i ns | Data r -> false ) in if (List.exists ce possible) || if (Sym.relem_name (Datamodel.elemName c) = (snd rdftype)) then let possibletype = image cext (Sym.relem_name (Datamodel.elemName c)) and cet(typ) = let (n,ei) = augment i typ in checkElement n c ei ns in List.exists cet possibletype else false then (modelChildren pnt ptype children i (nspaceExtend ns (Datamodel.elemNamespaces c))) else false | Datamodel.ValueNodeKind -> let possible = image ext pnt and ct(r) = checkValue r ptype (Datamodel.getrValueNode c) in List.exists ct possible | _ -> modelChildren pnt ptype children i ns let models dm i ns = let rec rootmodels n i ns = match i with { r=r; ext=ext; cext=cext } -> match Datamodel.getNodeKind n with Datamodel.DocNodeKind -> List.for_all (fun n -> rootmodels n i ns) (Datamodel.list_of_oforest (Datamodel.docChildren (Datamodel.getrDocNode n))) | Datamodel.ElemNodeKind -> let nd = Datamodel.getrElemNode(n) in let nid = (id nd ns) in (match nid with Some id -> checkElement id nd i (Namespace.empty_nsenv()) | None -> List.exists (fun(res)->checkElement res nd i ns) r) | _ -> true in rootmodels dm i ns let entails d1 d2 = models d2 (canonical d1 rdfns) rdfns let entailsForest d1 d2 = models d2 (canonicalForest d1 rdfns) rdfns let read file ns = let ast = Parseutil.parseXML file in Load.load_xml_forest ns ast let process file = let ns = Namespace.add_ns rdfns (Namespace.NSDefaultPrefix) (Namespace.NSUri file) in let dm = read file ns in ( dm, canonicalForest dm ns, ns ) let selfentails file = match process file with ( dm, i, ns) -> let mds = List.for_all (fun dm -> models dm i ns) (Datamodel.list_of_oforest dm) in ( i, mds )
This archive was generated by hypermail 2.1.4 : 04/02/02 EST