Source code for temo.fit.data_loaders

import pandas
import numpy as np
import CoolProp.CoolProp as CP

[docs] def only_the_fluids(df, identifier, identifiers): num_matches = 0 for key in df: if key.startswith(identifier): df = df[df[key].isin(identifiers)] num_matches += 1 if num_matches == 0: raise ValueError(f"Unable to match a single instance of your identifier root: {identifier}") return df.copy()
[docs] def read_and_subset(path, identifier, identifiers, apply_skip, sep=','): df = pandas.read_csv(path, comment='#', sep=sep) df = only_the_fluids(df, identifier, identifiers) if 'skip' in df and apply_skip: df = df[pandas.isnull(df.skip)] return df
[docs] def load_SOS(dataroot, *, apply_skip=True, identifier, identifiers, output_csv=None, molar_masses, verbosity=1, sep=','): """ Loader for speed of sound data """ df = read_and_subset(dataroot+'/SOS.csv', identifier=identifier, identifiers=identifiers, apply_skip=apply_skip, sep=sep) if df.empty: raise ValueError(f"No rows remained after loading the SOS and applying filter:: {{identifier: {identifier}, identifiers: {identifiers}}}") z_1 = df['z_1 / mole frac.'] df['M / kg/mol'] = z_1*molar_masses[0] + (1-z_1)*molar_masses[1] required_columns = ['T / K', 'Ao20', 'bibkey'] missing_columns = [col for col in required_columns if col not in df] if any(missing_columns): raise KeyError("Required column not found in SOS data: " + str(missing_columns)) if output_csv is not None: df.to_csv(output_csv, index=False) if verbosity > 0: print(f"Loaded {len(df)} rows from {dataroot+'/SOS.csv'}") def get_p_Pa(row): if 'p / Pa' in row and not pandas.isnull(row['p / Pa']): return row['p / Pa'] else: factors = {'p / Pa': 1.0, 'p / kPa': 1e3, 'p / MPa': 1e6, 'p / GPa': 1e9} for k, factor in factors.items(): if k in row and not pandas.isnull(row[k]): return row[k]*factor raise ValueError("no pressure was specified; allowed values are:"+str(factors.keys())) df['p / Pa'] = df.apply(get_p_Pa, axis=1) return df
def _density_processing(df, molar_masses=None): allowed_press_cols = ['p / Pa', 'p / kPa', 'p / MPa', 'p / GPa'] allowed_density_cols = ['rho / kg/m^3', 'rho / mol/m^3'] for key, gp in df.groupby('bibkey'): # Pressure in Pa, kPa, MPa, or GPa provided_press_cols = [col for col in allowed_press_cols if (col in gp and all(~pandas.isnull(gp[col])))] if len(provided_press_cols) != 1: raise ValueError(f"One and only one of the pressure options {allowed_press_cols} must be" f" provided for bibkey of {key}. You provided: {provided_press_cols}") # Density either in kg/m^3 or mol/m^3 provided_density_cols = [col for col in allowed_density_cols if (col in gp and all(~pandas.isnull(gp[col])))] if len(provided_density_cols) != 1: phases = gp.phases.iloc[0] property = gp.property.iloc[0] raise ValueError(f"One and only one of the density options [{allowed_density_cols}] must be" f" provided for bibkey of {key} w/ phases of {phases} and property of {property}. You provided: {provided_density_cols}") def get_molar_density(row): if 'rho / mol/m^3' in row and not pandas.isnull(row['rho / mol/m^3']): return row['rho / mol/m^3'] else: M = row['z_1 / mole frac.']*molar_masses[0] + row['z_2 / mole frac.']*molar_masses[1] return row['rho / kg/m^3']/M # Convert to molar density df['rho / mol/m^3'] = df.apply(get_molar_density, axis=1) def get_p_Pa(row): if 'p / Pa' in row and not pandas.isnull(row['p / Pa']): return row['p / Pa'] else: factors = {'p / Pa': 1.0, 'p / kPa': 1e3, 'p / MPa': 1e6, 'p / GPa': 1e9} for k, factor in factors.items(): if k in row and not pandas.isnull(row[k]): return row[k]*factor raise ValueError("no pressure was specified; allowed values are:"+str(factors.keys())) df['p / Pa'] = df.apply(get_p_Pa, axis=1) return df.copy()
[docs] def load_PVT(dataroot, *, identifier, identifiers, apply_skip=True, output_csv=None, molar_masses, verbosity=1, sep=',', empty_permitted=True): """ Loader for p-v-T data """ df = read_and_subset(dataroot+'/PVT.csv', identifier=identifier, identifiers=identifiers, apply_skip=apply_skip, sep=sep) if df.empty and not empty_permitted: raise ValueError(f"No rows remained after loading the PVT and applying filter:: {{identifier: {identifier}, identifiers: {identifiers}}}") if not df.empty: df = _density_processing(df, molar_masses=molar_masses) if output_csv is not None: df.to_csv(output_csv, index=False) if verbosity > 0: print(f"Loaded {len(df)} rows from {dataroot+'/PVT.csv'}") return df
[docs] def load_PVT_P(dataroot, *, identifier, identifiers, apply_skip=True, output_csv=None, molar_masses, verbosity=1, sep=','): """ Loader for p-v-T data with pressure deviations """ df = read_and_subset(dataroot+'/PVT_P.csv', identifier=identifier, identifiers=identifiers, apply_skip=False, sep=sep) if df.empty: raise ValueError(f"No rows remained after loading the PVT_P and applying filter:: {{identifier: {identifier}, identifiers: {identifiers}}}") df = _density_processing(df, molar_masses=molar_masses) if output_csv is not None: df.to_csv(output_csv, index=False) if verbosity > 0: print(f"Loaded {len(df)} rows from {dataroot+'/PVT_P.csv'}") return df
[docs] def load_VLE(dataroot, identifier, identifiers, apply_skip=True, output_csv=None, verbosity=1, molar_masses=None, sep=','): """ Loader for VLE data """ df = read_and_subset(dataroot+'/VLE.csv', identifier=identifier, identifiers=identifiers, apply_skip=apply_skip, sep=sep) if df.empty: raise ValueError(f"No rows remained after loading the VLE and applying filter:: {{identifier: {identifier}, identifiers: {identifiers}}}") required_columns = ['T / K', 'kind'] missing_columns = [col for col in required_columns if col not in df] if missing_columns: raise KeyError("Required column not found in VLE data: " + str(missing_columns)) def force_skip_pure(df, key): df = df[~(df[key] == 0.0)] df = df[~(df[key] == 1.0)] return df for kind, gp in df.groupby('kind'): if kind == 'PTXY': required_columns = ['x_1 / mole frac.', 'y_1 / mole frac.'] missing_columns = [col for col in required_columns if col not in gp] if missing_columns: raise KeyError("Required column not found in VLE data: " + str(missing_columns)) for col in required_columns: df = force_skip_pure(df, col) elif kind == 'BUB': required_columns = ['x_1 / mole frac.'] missing_columns = [col for col in required_columns if col not in gp] if missing_columns: raise KeyError("Required column not found in BUB data: " + str(missing_columns)) for col in required_columns: df = force_skip_pure(df, col) elif kind == 'DEW': required_columns = ['y_1 / mole frac.'] missing_columns = [col for col in required_columns if col not in gp] if missing_columns: raise KeyError("Required column not found in DEW data: " + str(missing_columns)) for col in required_columns: df = force_skip_pure(df, col) else: raise KeyError(f'Provided kind of "{kind}" is not in the set of {{"PTXY","BUB","DEW"}}') def get_p_Pa(row): if 'p / Pa' in row and not pandas.isnull(row['p / Pa']): return row['p / Pa'] else: factors = {'p / Pa': 1.0, 'p / kPa': 1e3, 'p / MPa': 1e6, 'p / GPa': 1e9} for k, factor in factors.items(): if k in row and not pandas.isnull(row[k]): return row[k]*factor raise ValueError("no pressure was specified; allowed values are: " + str(factors.keys())) df['p / Pa'] = df.apply(get_p_Pa, axis=1) if output_csv is not None: df.to_csv(output_csv, index=False) if verbosity > 0: print(f"Loaded {len(df)} rows from {dataroot+'/VLE.csv'}") return df
[docs] def load_CRIT(dataroot, identifier, identifiers, apply_skip=True, output_csv=None, verbosity=1, molar_masses=None, sep=','): """ Loader for critical point data """ df = read_and_subset(dataroot+'/CRIT.csv', identifier=identifier, identifiers=identifiers, apply_skip=apply_skip, sep=sep) required_columns = ['T / K', 'p / Pa'] missing_columns = [col for col in required_columns if col not in df] if missing_columns: raise KeyError("Required column not found in CRIT data: " + str(missing_columns)) def get_molar_density(row): if 'rho / mol/m^3' in row and pandas.isnull(row['rho / mol/m^3']) and 'rho / kg/m^3' in row and pandas.isnull(row['rho / kg/m^3']): return np.nan elif 'rho / mol/m^3' in row and not pandas.isnull(row['rho / mol/m^3']): return row['rho / mol/m^3'] elif 'rho / kg/m^3' in row and not pandas.isnull(row['rho / kg/m^3']) : M = row['z_1 / mole frac.']*molar_masses[0] + row['z_2 / mole frac.']*molar_masses[1] return row['rho / kg/m^3']/M else: return np.nan # Convert to molar density df['rho / mol/m^3'] = df.apply(get_molar_density, axis=1) if output_csv is not None: df.to_csv(output_csv, index=False) if verbosity > 0: print(f"Loaded {len(df)} rows from {dataroot+'/CRIT.csv'}") return df
# Parse B12data
[docs] def load_B12(dataroot, identifier, identifiers, apply_skip=True, output_csv=None, verbosity=1, molar_masses=None, sep=','): df = read_and_subset(dataroot+'/B12.csv', identifier=identifier, identifiers=identifiers, apply_skip=apply_skip, sep=sep) if output_csv is not None: df.to_csv(output_csv, index=False) return df.copy()
if __name__ == '__main__': identifiers = ['AMMONIA', 'WATER'] molar_masses=[CP.PropsSI('molemass',f) for f in identifiers] load_PVT('NH3H2O', identifier='FLD', identifiers=identifiers, molar_masses=molar_masses, output_csv=None) load_SOS('NH3H2O', identifier='FLD', identifiers=identifiers, molar_masses=molar_masses, output_csv=None) load_VLE('NH3H2O', identifier='FLD', identifiers=identifiers, molar_masses=molar_masses, output_csv=None) load_CRIT('NH3H2O', identifier='FLD', identifiers=identifiers, molar_masses=molar_masses, output_csv=None)