@@ -120,12 +120,14 @@ class Base(Block):
120120
121121 # pylint: disable=too-many-arguments
122122 def __init__ (self , model = None , model_alias = None , online = False ,
123- tokenize = True , tag = True , parse = True , resegment = False , ** kwargs ):
123+ tokenize = True , tag = True , parse = True , resegment = False ,
124+ delete_nodes = False , ** kwargs ):
124125 """Create the udpipe.En block object."""
125126 super ().__init__ (** kwargs )
126127 self .model , self .model_alias , self .online = model , model_alias , online
127128 self ._tool = None
128129 self .tokenize , self .tag , self .parse , self .resegment = tokenize , tag , parse , resegment
130+ self .delete_nodes = delete_nodes
129131
130132 @property
131133 def tool (self ):
@@ -146,16 +148,19 @@ def tool(self):
146148 return self ._tool
147149
148150 def process_document (self , doc ):
149- tok , tag , par = self .tokenize , self .tag , self .parse
151+ tok , tag , par , reseg = self .tokenize , self .tag , self .parse , self . resegment
150152 old_bundles = doc .bundles
151153 new_bundles = []
152154 for bundle in old_bundles :
153155 for tree in bundle :
154156 new_bundles .append (bundle )
155157 if self ._should_process_tree (tree ):
158+ if self .delete_nodes :
159+ for subroot in tree .children :
160+ subroot .remove ()
156161 if tok :
157- new_trees = self .tool .tokenize_tag_parse_tree (tree , resegment = self . resegment ,
158- tag = self . tag , parse = self . parse )
162+ new_trees = self .tool .tokenize_tag_parse_tree (tree , resegment = reseg ,
163+ tag = tag , parse = par )
159164 if self .resegment and len (new_trees ) > 1 :
160165 orig_bundle_id = bundle .bundle_id
161166 bundle .bundle_id = orig_bundle_id + '-1'
@@ -164,9 +169,9 @@ def process_document(self, doc):
164169 new_tree .zone = tree .zone
165170 new_bundle .add_tree (new_tree )
166171 new_bundles .append (new_bundle )
167- elif not tok and tag and par :
168- self .tool .tag_parse_tree (tree )
169- elif not tok and not tag and not par and self . resegment :
172+ elif not tok and not reseg and ( tag or par ) :
173+ self .tool .tag_parse_tree (tree , tag = tag , parse = par )
174+ elif not tok and reseg and not tag and not par :
170175 sentences = self .tool .segment_text (tree .text )
171176 if len (sentences ) > 1 :
172177 orig_bundle_id = bundle .bundle_id
@@ -178,7 +183,7 @@ def process_document(self, doc):
178183 new_tree .text = sentence
179184 new_bundles .append (new_bundle )
180185 else :
181- raise ValueError ("Unimplemented tokenize=%s tag=%s parse=%s" % ( tok , tag , par ) )
186+ raise ValueError (f "Unimplemented tokenize={ tok } tag={ tag } parse={ par } resegment= { reseg } " )
182187 doc .bundles = new_bundles
183188
184189'''
0 commit comments