Source code for temo.fit.data_loaders

import pandas
import numpy as np
import CoolProp.CoolProp as CP


[docs]
def only_the_fluids(df, identifier, identifiers):
    num_matches = 0
    for key in df:
        if key.startswith(identifier):
            df = df[df[key].isin(identifiers)]
            num_matches += 1
    if num_matches == 0:
        raise ValueError(f"Unable to match a single instance of your identifier root: {identifier}")
    return df.copy()



[docs]
def read_and_subset(path, identifier, identifiers, apply_skip, sep=','):
    df = pandas.read_csv(path, comment='#', sep=sep)
    df = only_the_fluids(df, identifier, identifiers)
    if 'skip' in df and apply_skip:
        df = df[pandas.isnull(df.skip)]
    return df





[docs]
def load_SOS(dataroot, *, apply_skip=True, identifier, identifiers, output_csv=None, molar_masses, verbosity=1, sep=','):
    """ Loader for speed of sound data """
    df = read_and_subset(dataroot+'/SOS.csv', identifier=identifier, identifiers=identifiers, apply_skip=apply_skip, sep=sep)
    if df.empty:
        raise ValueError(f"No rows remained after loading the SOS and applying filter:: {{identifier: {identifier}, identifiers: {identifiers}}}")
    z_1 = df['z_1 / mole frac.']
    df['M / kg/mol'] = z_1*molar_masses[0] + (1-z_1)*molar_masses[1]
    required_columns = ['T / K', 'Ao20', 'bibkey']
    missing_columns = [col for col in required_columns if col not in df]
    if any(missing_columns):
        raise KeyError("Required column not found in SOS data: " + str(missing_columns))
    
    if output_csv is not None:
        df.to_csv(output_csv, index=False)
    if verbosity > 0:
        print(f"Loaded {len(df)} rows from {dataroot+'/SOS.csv'}")
    
    def get_p_Pa(row):
        if 'p / Pa' in row and not pandas.isnull(row['p / Pa']):
            return row['p / Pa']
        else:
            factors = {'p / Pa': 1.0, 'p / kPa': 1e3, 'p / MPa': 1e6, 'p / GPa': 1e9}
            for k, factor in factors.items():
                if k in row and not pandas.isnull(row[k]):
                    return row[k]*factor
            raise ValueError("no pressure was specified; allowed values are:"+str(factors.keys()))
    
    df['p / Pa'] = df.apply(get_p_Pa, axis=1)
    
    return df


def _density_processing(df, molar_masses=None):
    allowed_press_cols = ['p / Pa', 'p / kPa', 'p / MPa', 'p / GPa']
    allowed_density_cols = ['rho / kg/m^3', 'rho / mol/m^3']

    for key, gp in df.groupby('bibkey'):

        # Pressure in Pa, kPa, MPa, or GPa
        provided_press_cols = [col for col in allowed_press_cols if (col in gp and all(~pandas.isnull(gp[col])))]
        if len(provided_press_cols) != 1:
            raise ValueError(f"One and only one of the pressure options {allowed_press_cols} must be"
            f" provided for bibkey of {key}. You provided: {provided_press_cols}")

        # Density either in kg/m^3 or mol/m^3    
        provided_density_cols = [col for col in allowed_density_cols if (col in gp and all(~pandas.isnull(gp[col])))]
        if len(provided_density_cols) != 1:
            phases = gp.phases.iloc[0]
            property = gp.property.iloc[0]
            
            raise ValueError(f"One and only one of the density options [{allowed_density_cols}] must be"
            f" provided for bibkey of {key} w/ phases of {phases} and property of {property}. You provided: {provided_density_cols}")

    def get_molar_density(row):
        if 'rho / mol/m^3' in row and not pandas.isnull(row['rho / mol/m^3']):
            return row['rho / mol/m^3']
        else:
            M = row['z_1 / mole frac.']*molar_masses[0] + row['z_2 / mole frac.']*molar_masses[1]
            return row['rho / kg/m^3']/M
    
    # Convert to molar density
    df['rho / mol/m^3'] = df.apply(get_molar_density, axis=1)

    def get_p_Pa(row):
        if 'p / Pa' in row and not pandas.isnull(row['p / Pa']):
            return row['p / Pa']
        else:
            factors = {'p / Pa': 1.0, 'p / kPa': 1e3, 'p / MPa': 1e6, 'p / GPa': 1e9}
            for k, factor in factors.items():
                if k in row and not pandas.isnull(row[k]):
                    return row[k]*factor
            raise ValueError("no pressure was specified; allowed values are:"+str(factors.keys()))
    df['p / Pa'] = df.apply(get_p_Pa, axis=1)
    return df.copy()


[docs]
def load_PVT(dataroot, *, identifier, identifiers, apply_skip=True, output_csv=None, molar_masses, verbosity=1, sep=',', empty_permitted=True):
    """ Loader for p-v-T data """
    df = read_and_subset(dataroot+'/PVT.csv', identifier=identifier, identifiers=identifiers, apply_skip=apply_skip, sep=sep)
    if df.empty and not empty_permitted:
        raise ValueError(f"No rows remained after loading the PVT and applying filter:: {{identifier: {identifier}, identifiers: {identifiers}}}")
    if not df.empty:
        df = _density_processing(df, molar_masses=molar_masses)

    if output_csv is not None:
        df.to_csv(output_csv, index=False)

    if verbosity > 0:
        print(f"Loaded {len(df)} rows from {dataroot+'/PVT.csv'}")

    return df



[docs]
def load_PVT_P(dataroot, *, identifier, identifiers, apply_skip=True, output_csv=None, molar_masses, verbosity=1, sep=','):
    """ Loader for p-v-T data with pressure deviations """
    df = read_and_subset(dataroot+'/PVT_P.csv', identifier=identifier, identifiers=identifiers, apply_skip=False, sep=sep)
    if df.empty:
        raise ValueError(f"No rows remained after loading the PVT_P and applying filter:: {{identifier: {identifier}, identifiers: {identifiers}}}")
    df = _density_processing(df, molar_masses=molar_masses)

    if output_csv is not None:
        df.to_csv(output_csv, index=False)

    if verbosity > 0:
        print(f"Loaded {len(df)} rows from {dataroot+'/PVT_P.csv'}")

    return df



[docs]
def load_VLE(dataroot, identifier, identifiers, apply_skip=True, output_csv=None, verbosity=1, molar_masses=None, sep=','):
    """ Loader for VLE data """
    df = read_and_subset(dataroot+'/VLE.csv', identifier=identifier, identifiers=identifiers, apply_skip=apply_skip, sep=sep)
    if df.empty:
        raise ValueError(f"No rows remained after loading the VLE and applying filter:: {{identifier: {identifier}, identifiers: {identifiers}}}")

    required_columns = ['T / K', 'kind']
    missing_columns = [col for col in required_columns if col not in df]
    if missing_columns:
        raise KeyError("Required column not found in VLE data: " + str(missing_columns))
    
    def force_skip_pure(df, key):
        df = df[~(df[key] == 0.0)]
        df = df[~(df[key] == 1.0)]
        return df
        
    for kind, gp in df.groupby('kind'):
        if kind == 'PTXY':
            required_columns = ['x_1 / mole frac.', 'y_1 / mole frac.']
            missing_columns = [col for col in required_columns if col not in gp]
            if missing_columns:
                raise KeyError("Required column not found in VLE data: " + str(missing_columns))
            for col in required_columns:
                df = force_skip_pure(df, col)
        elif kind == 'BUB':
            required_columns = ['x_1 / mole frac.']
            missing_columns = [col for col in required_columns if col not in gp]
            if missing_columns:
                raise KeyError("Required column not found in BUB data: " + str(missing_columns))
            for col in required_columns:
                df = force_skip_pure(df, col)
        elif kind == 'DEW':
            required_columns = ['y_1 / mole frac.']
            missing_columns = [col for col in required_columns if col not in gp]
            if missing_columns:
                raise KeyError("Required column not found in DEW data: " + str(missing_columns))
            for col in required_columns:
                df = force_skip_pure(df, col)
        else:
            raise KeyError(f'Provided kind of "{kind}" is not in the set of {{"PTXY","BUB","DEW"}}')

    def get_p_Pa(row):
        if 'p / Pa' in row and not pandas.isnull(row['p / Pa']):
            return row['p / Pa']
        else:
            factors = {'p / Pa': 1.0, 'p / kPa': 1e3, 'p / MPa': 1e6, 'p / GPa': 1e9}
            for k, factor in factors.items():
                if k in row and not pandas.isnull(row[k]):
                    return row[k]*factor
            raise ValueError("no pressure was specified; allowed values are: " + str(factors.keys()))
    df['p / Pa'] = df.apply(get_p_Pa, axis=1)
    
    if output_csv is not None:
        df.to_csv(output_csv, index=False)

    if verbosity > 0:
        print(f"Loaded {len(df)} rows from {dataroot+'/VLE.csv'}")

    return df



[docs]
def load_CRIT(dataroot, identifier, identifiers, apply_skip=True, output_csv=None, verbosity=1, molar_masses=None, sep=','):
    """ Loader for critical point data """
    df = read_and_subset(dataroot+'/CRIT.csv', identifier=identifier, identifiers=identifiers, apply_skip=apply_skip, sep=sep)

    required_columns = ['T / K', 'p / Pa']
    missing_columns = [col for col in required_columns if col not in df]
    if missing_columns:
        raise KeyError("Required column not found in CRIT data: " + str(missing_columns))

    def get_molar_density(row):
        if 'rho / mol/m^3' in row and pandas.isnull(row['rho / mol/m^3']) and 'rho / kg/m^3' in row and pandas.isnull(row['rho / kg/m^3']):
            return np.nan
        elif 'rho / mol/m^3' in row and not pandas.isnull(row['rho / mol/m^3']):
            return row['rho / mol/m^3']
        elif 'rho / kg/m^3' in row and not pandas.isnull(row['rho / kg/m^3']) :
            M = row['z_1 / mole frac.']*molar_masses[0] + row['z_2 / mole frac.']*molar_masses[1]
            return row['rho / kg/m^3']/M
        else:
            return np.nan
    
    # Convert to molar density
    df['rho / mol/m^3'] = df.apply(get_molar_density, axis=1)

    if output_csv is not None:
        df.to_csv(output_csv, index=False)

    if verbosity > 0:
        print(f"Loaded {len(df)} rows from {dataroot+'/CRIT.csv'}")

    return df


# Parse B12data

[docs]
def load_B12(dataroot, identifier, identifiers, apply_skip=True, output_csv=None, verbosity=1, molar_masses=None, sep=','):
    df = read_and_subset(dataroot+'/B12.csv', identifier=identifier, identifiers=identifiers, apply_skip=apply_skip, sep=sep)

    if output_csv is not None:
        df.to_csv(output_csv, index=False)

    return df.copy()


if __name__ == '__main__':
    identifiers = ['AMMONIA', 'WATER']
    molar_masses=[CP.PropsSI('molemass',f) for f in identifiers]
    load_PVT('NH3H2O', identifier='FLD', identifiers=identifiers, molar_masses=molar_masses, output_csv=None)
    load_SOS('NH3H2O', identifier='FLD', identifiers=identifiers, molar_masses=molar_masses, output_csv=None)
    load_VLE('NH3H2O', identifier='FLD', identifiers=identifiers, molar_masses=molar_masses, output_csv=None)
    load_CRIT('NH3H2O', identifier='FLD', identifiers=identifiers, molar_masses=molar_masses, output_csv=None)