Source code for preprocess.utils.pipeline

from collections import OrderedDict
from preprocess import basic, shallow

[docs]def pipeline(text: str,flow=None) -> str: """An easier function that allows to make a full Pipeline with the subprocess that users wants. Read the restriction- matrix to see what sequences of subprocess are imppossible. Parameters ---------- text: string to parse, generally a sentence. steps: string list with the ordered sequence of subprocesses to apply. Returns ------- parsed result : string output Initial text preprocessed with techniques def in the pipeline. """ #TODO: make a matrix to restrict impossible sequences. steps = OrderedDict() bad_steps = [] DEFAULT_FLOW = ['replace_urls','abbreviations','expand_contractions'] #If flow is not defined do the default flow. if flow is None: print('Runing default pipeline') return pipeline(text,DEFAULT_FLOW) #If flow is defined check the functions for step in flow: if step in basic.__techniques__: steps[step]=basic.__techniques__[step] elif step in shallow.__techniques__: steps[step]=shallow.__techniques__[step] else: bad_steps.append(step) print("the %s technique could not be found" % step) #TODO:Check if the order is possible in the matrix of possible #sequences #Apply correct functions for step in steps: try: text = steps[step](text) except: print("%s technique can be concatenated" % step) return False return text