Source code for clintk.text_parser.section_manager

"""
Module to manage sections found by parser
"""
import pandas as pd
from .parser_utils import main_parser


[docs]def main_splitter(df, columns):
    """ splits all the entries of df

    Using `main_splitter` causes to split texts into several rows, one text
    is split into the number of sections it contains

    Parameters
    ----------
    df : pd.DataFrame

    columns : list of str

    Returns
    -------
    pd.DataFrame


    """
    df_res = pd.DataFrame(columns=columns)
    for index, row in df.iterrows():
        dic = main_parser(row['report'], str(row['patient_id']) + ' ' + row['original_date'])
        split = splitter(row['patient_id'],
                         row['original_date'],
                         row['cycle'],
                         dic)
        new_df = pd.DataFrame(split, columns=columns)

        df_res = pd.concat([df_res, new_df])
    return df_res


[docs]def splitter(patient_id, date, cycle, report_dict):
    """ splits the report into the number of keys in report_dict
    
    Parameters
    ----------
    patient_id
    date
    cycle
    report_dict

    Returns
    -------
    """
    return [{'patient_id': patient_id,
             'date': date,
             'cycle': cycle,
             'section': key,
             'text': report_dict[key]} for key in report_dict]


[docs]def reduce_dic(dico, sections):
    """ merges key, values of a dictionary

    @TODO find sections names using regex

    Parameters
    ----------
    dico : dict

    sections : list of str
        name of the sections to keep as in `ReportsParser.sections`

    Returns
    -------
    str
        concatenated contents of sections

    """
    res = ''
    # if sections id not None
    if sections:
        for key, value in dico.items():
            if key in sections:
                res += ' ' + value
    #keep all the sections
    else:
        for key, value in dico.items():
            res += ' ' + value

    return res