1-
1+ import copy
22import hashlib
3- from pyld . nquads import parse_nquads , serialize_nquad
3+
44from pyld .identifier_issuer import IdentifierIssuer
5- import copy
5+ from pyld . nquads import parse_nquads , serialize_nquad
66
77
8- class URDNA2015 ( object ) :
8+ class URDNA2015 :
99 """
1010 URDNA2015 implements the URDNA2015 RDF Dataset Normalization Algorithm.
1111 """
@@ -20,11 +20,11 @@ def __init__(self):
2020 # 4.4) Normalization Algorithm
2121 def main (self , dataset , options ):
2222 # handle invalid output format
23- if 'format' in options :
24- if ( options ['format' ] != 'application/n-quads' and
25- options ['format' ] != 'application/nquads' ):
26- raise UnknownFormatError (
27- 'Unknown output format.' , options ['format' ])
23+ if 'format' in options and (
24+ options ['format' ] != 'application/n-quads'
25+ and options ['format' ] != 'application/nquads'
26+ ):
27+ raise UnknownFormatError ( 'Unknown output format.' , options ['format' ])
2828
2929 # 1) Create the normalization state.
3030
@@ -49,8 +49,9 @@ def main(self, dataset, options):
4949 if key == 'predicate' or component ['type' ] != 'blank node' :
5050 continue
5151 id_ = component ['value' ]
52- self .blank_node_info .setdefault (
53- id_ , {'quads' : []})['quads' ].append (quad )
52+ self .blank_node_info .setdefault (id_ , {'quads' : []})['quads' ].append (
53+ quad
54+ )
5455
5556 # 3) Create a list of non-normalized blank node identifiers and
5657 # populate it using the keys from the blank node to quads map.
@@ -105,7 +106,7 @@ def main(self, dataset, options):
105106
106107 # 6) For each hash to identifier list mapping in hash to blank nodes
107108 # map, lexicographically-sorted by hash:
108- for hash , id_list in sorted (self .hash_to_blank_nodes .items ()):
109+ for _hash , id_list in sorted (self .hash_to_blank_nodes .items ()):
109110 # 6.1) Create hash path list where each item will be a result of
110111 # running the Hash N-Degree Quads algorithm.
111112 hash_path_list = []
@@ -157,11 +158,12 @@ def main(self, dataset, options):
157158 for key , component in quad .items ():
158159 if key == 'predicate' :
159160 continue
160- if ( component ['type' ] == 'blank node' and not
161- component [ 'value' ]. startswith (
162- self .canonical_issuer .prefix ) ):
161+ if component ['type' ] == 'blank node' and not component [
162+ 'value'
163+ ]. startswith ( self .canonical_issuer .prefix ):
163164 component ['value' ] = self .canonical_issuer .get_id (
164- component ['value' ])
165+ component ['value' ]
166+ )
165167
166168 # 7.2) Add quad copy to the normalized dataset.
167169 normalized .append (serialize_nquad (quad ))
@@ -170,8 +172,10 @@ def main(self, dataset, options):
170172 normalized .sort ()
171173
172174 # 8) Return the normalized dataset.
173- if (options .get ('format' ) == 'application/n-quads' or
174- options .get ('format' ) == 'application/nquads' ):
175+ if (
176+ options .get ('format' ) == 'application/n-quads'
177+ or options .get ('format' ) == 'application/nquads'
178+ ):
175179 return '' .join (normalized )
176180 return parse_nquads ('' .join (normalized ))
177181
@@ -206,8 +210,7 @@ def hash_first_degree_quads(self, id_):
206210 # matches the reference blank node identifier then use the
207211 # blank node identifier _:a, otherwise, use the blank node
208212 # identifier _:z.
209- copy [key ] = self .modify_first_degree_component (
210- id_ , component , key )
213+ copy [key ] = self .modify_first_degree_component (id_ , component , key )
211214 nquads .append (serialize_nquad (copy ))
212215
213216 # 4) Sort nquads in lexicographical order.
@@ -301,7 +304,7 @@ def hash_n_degree_quads(self, id_, issuer):
301304 for related in permutation :
302305 # 5.4.4.1) If a canonical identifier has been issued for
303306 # related, append it to path.
304- if ( self .canonical_issuer .has_id (related ) ):
307+ if self .canonical_issuer .has_id (related ):
305308 path += self .canonical_issuer .get_id (related )
306309 # 5.4.4.2) Otherwise:
307310 else :
@@ -320,9 +323,11 @@ def hash_n_degree_quads(self, id_, issuer):
320323 # path is greater than or equal to the length of chosen
321324 # path and path is lexicographically greater than chosen
322325 # path, then skip to the next permutation.
323- if (len (chosen_path ) != 0 and
324- len (path ) >= len (chosen_path ) and
325- path > chosen_path ):
326+ if (
327+ len (chosen_path ) != 0
328+ and len (path ) >= len (chosen_path )
329+ and path > chosen_path
330+ ):
326331 skip_to_next_permutation = True
327332 break
328333
@@ -352,9 +357,11 @@ def hash_n_degree_quads(self, id_, issuer):
352357 # path is greater than or equal to the length of chosen
353358 # path and path is lexicographically greater than chosen
354359 # path, then skip to the next permutation.
355- if (len (chosen_path ) != 0 and
356- len (path ) >= len (chosen_path ) and
357- path > chosen_path ):
360+ if (
361+ len (chosen_path ) != 0
362+ and len (path ) >= len (chosen_path )
363+ and path > chosen_path
364+ ):
358365 skip_to_next_permutation = True
359366 break
360367
@@ -394,9 +401,11 @@ def create_hash_to_related(self, id_, issuer):
394401 # object, and graph name and it is a blank node that is not
395402 # identified by identifier:
396403 for key , component in quad .items ():
397- if (key != 'predicate' and
398- component ['type' ] == 'blank node' and
399- component ['value' ] != id_ ):
404+ if (
405+ key != 'predicate'
406+ and component ['type' ] == 'blank node'
407+ and component ['value' ] != id_
408+ ):
400409 # 3.1.1) Set hash to the result of the Hash Related Blank
401410 # Node algorithm, passing the blank node identifier for
402411 # component as related, quad, path identifier issuer as
@@ -405,8 +414,7 @@ def create_hash_to_related(self, id_, issuer):
405414 # respectively.
406415 related = component ['value' ]
407416 position = self .POSITIONS [key ]
408- hash = self .hash_related_blank_node (
409- related , quad , issuer , position )
417+ hash = self .hash_related_blank_node (related , quad , issuer , position )
410418
411419 # 3.1.2) Add a mapping of hash to the blank node identifier
412420 # for component to hash to related blank nodes map, adding
@@ -467,17 +475,21 @@ def create_hash_to_related(self, id_, issuer):
467475 # algorithm, passing the blank node identifier for subject as
468476 # related, quad, path identifier issuer as issuer, and p as
469477 # position.
470- if (quad ['subject' ]['type' ] == 'blank node' and
471- quad ['subject' ]['value' ] != id_ ):
478+ if (
479+ quad ['subject' ]['type' ] == 'blank node'
480+ and quad ['subject' ]['value' ] != id_
481+ ):
472482 related = quad ['subject' ]['value' ]
473483 position = 'p'
474484 # 3.2) Otherwise, if quad's object is a blank node that does
475485 # not match identifier, to the result of the Hash Related Blank
476486 # Node algorithm, passing the blank node identifier for object
477487 # as related, quad, path identifier issuer as issuer, and r
478488 # as position.
479- elif (quad ['object' ]['type' ] == 'blank node' and
480- quad ['object' ]['value' ] != id_ ):
489+ elif (
490+ quad ['object' ]['type' ] == 'blank node'
491+ and quad ['object' ]['value' ] != id_
492+ ):
481493 related = quad ['object' ]['value' ]
482494 position = 'r'
483495 # 3.3) Otherwise, continue to the next quad.
@@ -487,8 +499,7 @@ def create_hash_to_related(self, id_, issuer):
487499 # 3.4) Add a mapping of hash to the blank node identifier for the
488500 # component that matched (subject or object) to hash to related
489501 # blank nodes map, adding an entry as necessary.
490- hash = self .hash_related_blank_node (
491- related , quad , issuer , position )
502+ hash = self .hash_related_blank_node (related , quad , issuer , position )
492503 hash_to_related .setdefault (hash , []).append (related )
493504
494505 return hash_to_related
@@ -525,9 +536,10 @@ def permutations(elements):
525536 for i in range (length ):
526537 e = elements [i ]
527538 is_left = left [e ]
528- if ((k is None or e > k ) and
529- ((is_left and i > 0 and e > elements [i - 1 ]) or
530- (not is_left and i < last and e > elements [i + 1 ]))):
539+ if (k is None or e > k ) and (
540+ (is_left and i > 0 and e > elements [i - 1 ])
541+ or (not is_left and i < last and e > elements [i + 1 ])
542+ ):
531543 k , pos = e , i
532544
533545 # no more permutations
@@ -551,4 +563,4 @@ class UnknownFormatError(ValueError):
551563
552564 def __init__ (self , message , format ):
553565 Exception .__init__ (self , message )
554- self .format = format
566+ self .format = format
0 commit comments