Source code for pysme.linelist.vald

# -*- coding: utf-8 -*-
"""
Module for handling linelist data from the VALD3 database (http://vald.astro.uu.se/).


"""
import logging
import re
from io import StringIO
from os.path import dirname, join, exists
from copy import deepcopy

import numpy as np
import pandas as pd
import pybtex.database
from astropy import units as u

from ..abund import Abund
from .linelist import LineList, LineListError

logger = logging.getLogger(__name__)


[docs] class ValdError(LineListError): """Vald Data File Error"""
[docs] class ValdFile(LineList): """Atomic data for a list of spectral lines.""" citation_info = r""" @ARTICLE{2015PhyS...90e4005R, author = {{Ryabchikova}, T. and {Piskunov}, N. and {Kurucz}, R.~L. and {Stempels}, H.~C. and {Heiter}, U. and {Pakhomov}, Yu and {Barklem}, P.~S.}, title = "{A major upgrade of the VALD database}", journal = {Physica Scripta}, year = "2015", month = "May", volume = {90}, number = {5}, eid = {054005}, pages = {054005}, doi = {10.1088/0031-8949/90/5/054005}, adsurl = {https://ui.adsabs.harvard.edu/abs/2015PhyS...90e4005R}, adsnote = {Provided by the SAO/NASA Astrophysics Data System} } @ARTICLE{2000BaltA...9..590K, author = {{Kupka}, F.~G. and {Ryabchikova}, T.~A. and {Piskunov}, N.~E. and {Stempels}, H.~C. and {Weiss}, W.~W.}, title = "{VALD-2 -- The New Vienna Atomic Line Database}", journal = {Baltic Astronomy}, keywords = {ATOMIC DATA, METHODS: SPECTROSCOPIC, STARS: ABUNDANCES, STARS: CHEMICALLY PECULIAR}, year = "2000", month = "Jan", volume = {9}, pages = {590-594}, doi = {10.1515/astro-2000-0420}, adsurl = {https://ui.adsabs.harvard.edu/abs/2000BaltA...9..590K}, adsnote = {Provided by the SAO/NASA Astrophysics Data System} } @ARTICLE{1999A&AS..138..119K, author = {{Kupka}, F. and {Piskunov}, N. and {Ryabchikova}, T.~A. and {Stempels}, H.~C. and {Weiss}, W.~W.}, title = "{VALD-2: Progress of the Vienna Atomic Line Data Base}", journal = {\aaps}, keywords = {ATOMIC DATA, TECHNIQUES: SPECTROSCOPIC, SUN: ABUNDANCES, STARS: ABUNDANCES, STARS: ATMOSPHERES, STARS: CHEMICALLY PECULIAR}, year = "1999", month = "Jul", volume = {138}, pages = {119-133}, doi = {10.1051/aas:1999267}, adsurl = {https://ui.adsabs.harvard.edu/abs/1999A&AS..138..119K}, adsnote = {Provided by the SAO/NASA Astrophysics Data System} } @ARTICLE{1997BaltA...6..244R, author = {{Ryabchikova}, T.~A. and {Piskunov}, N.~E. and {Kupka}, F. and {Weiss}, W.~W.}, title = "{The Vienna Atomic Line Database : Present State and Future Development}", journal = {Baltic Astronomy}, keywords = {DATABASES: ATOMIC LINE PARAMETERS, STELLAR SPECTROSCOPY}, year = "1997", month = "Mar", volume = {6}, pages = {244-247}, doi = {10.1515/astro-1997-0216}, adsurl = {https://ui.adsabs.harvard.edu/abs/1997BaltA...6..244R}, adsnote = {Provided by the SAO/NASA Astrophysics Data System} } @ARTICLE{1995A&AS..112..525P, author = {{Piskunov}, N.~E. and {Kupka}, F. and {Ryabchikova}, T.~A. and {Weiss}, W.~W. and {Jeffery}, C.~S.}, title = "{VALD: The Vienna Atomic Line Data Base.}", journal = {\aaps}, keywords = {ATOMIC DATA, STARS: ABUNDANCES, ASTRONOMICAL DATA BASES: MISCELLANEOUS}, year = "1995", month = "Sep", volume = {112}, pages = {525}, adsurl = {https://ui.adsabs.harvard.edu/abs/1995A&AS..112..525P}, adsnote = {Provided by the SAO/NASA Astrophysics Data System} } """ acknowledgement = ( r"This work has made use of the VALD database, operated at Uppsala University," r"the Institute of Astronomy RAS in Moscow, and the University of Vienna." ) def __init__(self, filename, medium=None): self.filename = filename self.atmo = None self.abund = None self.unit = None self.energy_unit = None linelist = self.loads(filename) super().__init__( linelist, lineformat=self.lineformat, medium=self.medium, citation_info=self.citation_info, ) # Convert to desired medium if medium is not None: self.medium = medium
[docs] @staticmethod def load(filename): """ Read line data file from the VALD extract stellar service Parameters ---------- filename : str Name of the VALD linelist file to read Returns ------- vald : ValdFile Parsed vald file """ return ValdFile(filename)
def __getitem__(self, index): if isinstance(index, str) and hasattr(self, index): return getattr(self, index) if isinstance(index, (list, str)): if len(index) == 0: return_list = deepcopy(self) return_list._lines = self._lines.iloc[[]] return_list.nlines = len(return_list._lines) return return_list values = self._lines[index].values if index in self.string_columns: values = values.astype(str) return values if isinstance(index, int): index = slice(index, index + 1) return_list = deepcopy(self) return_list._lines = self._lines.iloc[index].copy() return_list.nlines = len(return_list._lines) return return_list
[docs] def identify_valdtype(self, lines): """Determines whether the file was created with extract_all, extract_stellar, or extract_element and whether it is in long or short format Parameters ---------- lines : list(str) file contents """ header = lines[0].strip().split() if header[0] == "Damping": # short format extract all / extract element return "extract_all", "short" elif header[0] == "Lande": # long format extract all / extract element return "extract_all", "long" else: header = lines[1].strip().split() if header[0] == "Damping": # short format, extract stellar return "extract_stellar", "short" elif header[0] == "Lande": # long format, extract stellar return "extract_stellar", "long" raise ValueError("Could not identify ValdFile type")
[docs] def loads(self, filename): logger.info("Loading VALD file %s", filename) with open(filename, "r") as file: lines = file.readlines() # Check for Warnings while lines[0].lstrip().startswith("WARNING"): logger.warning(f"VALD {lines[0].lstrip()}") lines = lines[1:] # Determine File type and format valdtype, fmt = self.identify_valdtype(lines) self.valdtype = valdtype # Determine the number of lines in the file if valdtype == "extract_stellar": n = self.parse_header(lines[0]) # Skip the info header if extract stellar self.header = lines[0] lines = lines[1:] else: self.header = '' n = self.parse_nlines(lines, fmt, valdtype) # Determine the units and medium in the linelist self.parse_columns(lines[1]) # Split the lines into the different parts try: if fmt == "long": linedata = lines[2 : 2 + n * 4] refdata = linedata[3::4] self._ref_record = lines[2 + n * 4:] if valdtype == "extract_stellar": atmodata = lines[2 + n * 4] abunddata = lines[3 + n * 4 : 21 + n * 4] self._ref_record = lines[21 + n * 4:] elif fmt == "short": linedata = lines[2 : 2 + n] refdata = linedata self._ref_record = lines[2 + n:] if valdtype == "extract_stellar": atmodata = lines[2 + n] abunddata = lines[3 + n : 21 + n] self._ref_record = lines[21 + n:] except IndexError: msg = "Linelist file is shorter than it should be according to the number of lines. Is it incomplete?" logger.error(msg) raise IOError(msg) # Process the individual parts linelist = self.parse_linedata(linedata, fmt=fmt, valdtype=valdtype) if valdtype == "extract_stellar": self.atmo = self.parse_valdatmo(atmodata) self.abund = self.parse_abund(abunddata) linelist['nlte_flag'] = np.nan self.citation_info += self.parse_references(refdata, fmt) return linelist
[docs] def parse_nlines(self, lines, fmt, valdtype): if valdtype == "extract_stellar": pattern = r"^ '\S+',$" else: pattern = r"\* oscillator|References" pattern = re.compile(pattern) for i in range(len(lines)): if re.match(pattern, lines[i]): # Offset by the two header lines n = i - 2 break if fmt == "long": n //= 4 self.nlines = n return self.nlines
[docs] def parse_header(self, line): """ Parse header line from a VALD line data file and sets the internal parameters Parameters ---------- line : str header line of a vald file Raises ------ ValdError If the header is not understood """ words = [w.strip() for w in line.split(",")] # if len(words) < 5 or words[5] != "Wavelength region": # raise ValdError(f"{self.filename} is not a VALD line data file") try: self._wavelo = float(words[0]) self._wavehi = float(words[1]) self.nlines = int(words[2]) self._nlines_proc = int(words[3]) self._vmicro = float(words[4]) except: raise ValdError(f"{self.filename} is not a VALD line data file") return self.nlines
[docs] def parse_columns(self, line): match = re.search(r"WL_(air|vac)\((.*?)\)", line) medium = match.group(1) unit = match.group(2) match = re.search(r"E_low\((.*?)\)", line) if match is None: match = re.search(r"Excit\((.*?)\)", line) energy_unit = match.group(1) if medium == "air": self._medium = "air" elif medium == "vac": self._medium = "vac" else: raise ValueError( "Could not determine the medium that the wavelength is based on (air or vacuum)" ) if unit == "A": self.unit = u.AA elif unit == "nm": self.unit = u.nm elif unit == "cm^-1": self.unit = 1 / u.cm else: raise ValueError("Could not determine the unit of the wavelength") if energy_unit == "eV": self.energy_unit = u.eV elif energy_unit == "cm^-1": self.energy_unit = 1 / u.cm else: raise ValueError("could not determine the unit of the energy levels") return self.medium, self.unit, self.energy_unit
[docs] def parse_linedata(self, lines, fmt="short", valdtype="extract_stellar"): """Parse line data from a VALD line data file Parameters ---------- lines : list of str lines of the input data file fmt : {"short", "long"}, optional linelist format, short format has one line of data per spectral line, while the long format uses four lines per spectral line. The default is "short" Returns ------- linelist : LineList the parsed linelist """ if fmt == "short": if valdtype == "extract_all": names = [ "species", "wlcent", "excit", "gflog", "gamrad", "gamqst", "gamvw", "lande", "reference", ] if valdtype == "extract_stellar": names = [ "species", "wlcent", "excit", "vmic", "gflog", "gamrad", "gamqst", "gamvw", "lande", "depth", "reference", ] elif fmt == "long": names = [ "species", "wlcent", "gflog", "excit", "j_lo", "e_upp", "j_up", "lande_lower", "lande_upper", "lande", "gamrad", "gamqst", "gamvw", ] if valdtype == "extract_stellar": names += ["depth"] term_lower = lines[1::4] term_upper = lines[2::4] comment = lines[3::4] lines = lines[::4] data = StringIO("".join(lines)) linelist = pd.read_csv( data, sep=",", names=names, header=None, quotechar="'", skipinitialspace=True, usecols=range(len(names)), ) # Convert from cm^-1 to eV if self.energy_unit == 1 / u.cm: conversion_factor = 8065.544 linelist["excit"] /= conversion_factor if fmt == "long": linelist["e_upp"] /= conversion_factor if fmt == "long": comment = [c.replace("'", "").strip() for c in comment] linelist["reference"] = comment # Parse energy level terms # Extract Stellar has quotation marks around the levels # extract element does not... if valdtype == "extract_stellar": couple_lower = [t[1:8].strip() for t in term_lower] term_lower = [t.strip()[8:-1].strip() for t in term_lower] couple_upper = [t[1:8].strip() for t in term_upper] term_upper = [t.strip()[8:-1].strip() for t in term_upper] else: couple_lower = [t[:8].strip() for t in term_lower] term_lower = [t.strip()[8:].strip() for t in term_lower] couple_upper = [t[:8].strip() for t in term_upper] term_upper = [t.strip()[8:].strip() for t in term_upper] couple_lower = [ele if ele != '' else ' ' for ele in couple_lower] couple_upper = [ele if ele != '' else ' ' for ele in couple_upper] term_lower = np.char.partition(term_lower, " ")[:, (0, 2)] term_lower = np.char.strip(term_lower) idx = term_lower[:, 1] == "" term_lower[idx, 1] = term_lower[idx, 0] term_lower = np.char.add( np.char.add(term_lower[:, 0], " "), term_lower[:, 1] ) term_upper = np.char.partition(term_upper, " ")[:, (0, 2)] term_upper = np.char.strip(term_upper) idx = term_upper[:, 1] == "" term_upper[idx, 1] = term_upper[idx, 0] term_upper = np.char.add( np.char.add(term_upper[:, 0], " "), term_upper[:, 1] ) linelist["couple_lower"] = couple_lower linelist["term_lower"] = term_lower linelist["couple_upper"] = couple_upper linelist["term_upper"] = term_upper # extract error data error = np.array([s[:10].strip() for s in comment]) error = LineList.parse_line_error( error, linelist["depth"] if valdtype == "extract_stellar" else None, ) linelist["error"] = error # Convert from whatever unit to Angstrom factor = self.unit.to(u.AA) linelist["wlcent"] *= factor self.unit = "Angstrom" self.lineformat = fmt return linelist
[docs] def parse_valdatmo(self, line): """Parse VALD model atmosphere line from a VALD line data file Parameters ---------- line : str line form the model atmosphere Returns ------- atmo : str Name of the model atmosphere Raises ------ ValdError If the line is not from a model atmosphere """ lstr = line.strip() if lstr[0] != "'" or lstr[-2:] != "',": raise ValdError(f"error parsing model atmosphere: {lstr}") return lstr[1:-2]
[docs] def parse_abund(self, lines): """Parse VALD abundance lines from a VALD line data file Parameters ---------- lines : list of str Lines containing the VALD abundance data Returns ------- abund : Abund Parsed abundance data Raises ------ ValdError If the data could not be parsed """ abstr = "".join(["".join(line.split()) for line in lines]) words = [w[1:-1] for w in abstr.split(",")] if len(words) != 100 or words[99] != "END": raise ValdError(f"Error parsing abundances: {abstr}") pattern = [w.split(":") for w in words[:-1]] pattern = {el: float(ab) for el, ab in pattern} monh = 0 return Abund(monh, pattern, type="sme")
[docs] def parse_references(self, lines, fmt): # Search the linelist data for this pattern, e.g: # 1 gf:K14 # 4 KCN' if fmt == "long": idiscard = 45 elif fmt == "short": idiscard = 90 else: raise ValueError pattern = r"\s\d+ (\w+:)?([\w+]+)[\s']" pattern = re.compile(pattern) # Discard the initial part of the line lines = [l[idiscard:] for l in lines] lines = "".join(lines) references = [match.group(2) for match in re.finditer(pattern, lines)] # We only need each reference ones ref = set(references) # Multiple references are seperated by '+' references = [] for r in ref: references += r.split("+") # And make it unique again, if necessary references = set(references) # some data entries are case sensitive, but bibtex is case insnsitive # so remove those and replace them with fixed versions if "LWb" in references: references.add("LWb2") references.remove("LWb") if "LGb" in references: references.add("LGb2") references.remove("LGb") # Get references from bibtex file # TODO: only load this once? But then again, how often will we do this? bibdata = pybtex.database.parse_file(join(dirname(__file__), "VALD3_ref.bib")) # DEBUG: # pybtex.format_from_string(bibdata.to_string("bibtex"), style="plain", output_backend="plaintext") entries = {} for r in references: try: entries[r] = bibdata.entries[r] except KeyError as ex: logger.warning(f"Could not find citation key: {r}") logger.debug(ex) bibdata_filtered = pybtex.database.BibliographyData(entries) return bibdata_filtered.to_string("bibtex")
[docs] def save(self, filename, overwrite=False): ''' Save the line list to a file, in VALD format. ''' list_save_content = [] # Output the header if self.header != '': if len(self._lines) > 0: wavelo = float(np.min(self._lines["wlcent"])) wavehi = float(np.max(self._lines["wlcent"])) else: wavelo = float(getattr(self, "_wavelo", 0.0)) wavehi = float(getattr(self, "_wavehi", 0.0)) vmicro = float(getattr(self, "_vmicro", 0.0)) nlines = int(len(self._lines)) list_save_content.append( f"{wavelo:11.4f}, {wavehi:10.4f},{nlines},{nlines},{vmicro:4.1f}, " "Wavelength region, Lines selected, Lines processed, Vmicro\n" ) # 2. Output each line in wavelength if self.lineformat == 'long': if self.valdtype == 'extract_stellar': line_header = ' Lande factors Damping parameters Central\nSpec Ion WL_air(A) log gf* E_low(eV) J lo E_up(eV) J up lower upper mean Rad. Stark Waals depth' elif self.valdtype == 'extract_all': line_header = ''' Lande factors Damping parameters\nElm Ion WL_air(A) log gf* E_low(eV) J lo E_up(eV) J up lower upper mean Rad. Stark Waals''' elif self.lineformat == 'short': if self.valdtype == 'extract_stellar': line_header = ''' Damping parameters Lande Central\nSpec Ion WL_air(A) Excit(eV) Vmic log gf* Rad. Stark Waals factor depth Reference''' elif self.valdtype == 'extract_all': line_header = ''' Damping parameters Lande\nElm Ion WL_air(A) Excit(eV) log gf* Rad. Stark Waals factor References''' else: raise ValueError('VALD line format not recognized.') list_save_content.append(line_header) if self.lineformat == 'long': if self.valdtype == 'extract_stellar': def line_to_text(row): width = 21 - len(row["species"]) return f"'{row['species']}',{row['wlcent']:{width}.5f},{row['gflog']:7.3f},{row['excit']:8.4f},{row['j_lo']:5.1f},{row['e_upp']:8.4f},{row['j_up']:5.1f},{row['lande_lower']:7.3f},{row['lande_upper']:7.3f},{row['lande']:7.3f},{row['gamrad']:6.3f},{row['gamqst']:6.3f},{row['gamvw']:6.3f},{row['depth']:6.3f},\n' {row['couple_lower']} {row['term_lower']:>83}'\n' {row['couple_upper']} {row['term_upper']:>83}'\n'{row['reference']}'" elif self.valdtype == 'extract_all': def line_to_text(row): width = 20 - len(row["species"]) return f"'{row['species']}',{row['wlcent']:{width}.5f},{row['gflog']:8.3f},{row['excit']:8.4f},{row['j_lo']:5.1f},{row['e_upp']:8.4f},{row['j_up']:5.1f},{row['lande_lower']:7.3f},{row['lande_upper']:7.3f},{row['lande']:7.3f},{row['gamrad']:6.3f},{row['gamqst']:6.3f},{row['gamvw']:6.3f},\n {row['couple_lower']} {row['term_lower']:>83}\n {row['couple_upper']} {row['term_upper']:>83}\n'{row['reference']}'" elif self.lineformat == 'short': if self.valdtype == 'extract_stellar': def line_to_text(row): width = 21 - len(row["species"]) return f"'{row['species']}',{row['wlcent']:{width}.5f},{row['excit']:8.4f},{row['vmic']:4.1f},{row['gflog']:7.3f},{row['gamrad']:6.3f},{row['gamqst']:6.3f},{row['gamvw']:6.3f},{row['lande']:7.3f},{row['depth']:6.3f}, '{row['reference']}'" elif self.valdtype == 'extract_all': def line_to_text(row): width = 20 - len(row["species"]) return f"'{row['species']}',{row['wlcent']:{width}.5f},{row['excit']:9.3f},{row['gflog']:7.3f},{row['gamrad']:6.3f},{row['gamqst']:6.3f},{row['gamvw']:6.3f},{row['lande']:7.3f},'{row['reference']}'" line_text = list(self._lines.apply(line_to_text, axis=1).values) list_save_content += line_text # Only for extract_stellar: add model and abund if self.valdtype == 'extract_stellar': list_save_content.append(f"'{self.atmo}',") pattern = self.abund.get_pattern(type="H=12") abund_text = '' count = 1 for ele in pattern.keys(): abund_text += f"'{ele:<2}:{pattern[ele]:6.2f}'," if ele == 'He' or (count-2) % 6 == 0: abund_text += '\n' count += 1 abund_text += "'END'" list_save_content += [abund_text] # 4. Output references list_save_content += self._ref_record list_save_content = [ele if ele[-1:] == '\n' else ele+'\n' for ele in list_save_content] if exists(filename) and not overwrite: raise FileExistsError(f"The file '{filename}' already exists. Use overwrite=True to overwrite it.") else: # 如果文件不存在,直接写入 with open(filename, 'w') as file: file.writelines(list_save_content)
@staticmethod def _get_ref_pair(vlist): ''' Get the reference tag, description and number pairs. ''' if vlist.lineformat == 'long': ref_pair = [ele[29:].replace('iso:', '').replace('wl:', '').replace('gf:', '').split()[:-1] if 'hfs:' not in ele else ele[29:].replace('iso:', '').replace('wl:', '').replace('gf:', '').split()[:-2] for ele in vlist['reference']] elif vlist.lineformat == 'short': ref_pair = [ele.replace('iso:', '').replace('wl:', '').replace('gf:', '').split()[:-1] if 'hfs:' not in ele else ele.replace('iso:', '').replace('wl:', '').replace('gf:', '').split()[:-2] for ele in vlist['reference']] ref_pair = [item for sublist in ref_pair for item in sublist] pairs = [(ref_pair[i+1]+'|'+ref_pair[i], int(ref_pair[i])) for i in range(0, len(ref_pair), 2)] # return pairs ref_pair = list(set(pairs)) ref_pair = sorted(ref_pair, key=lambda x: x[1]) ref_pair_dict = {} ref_record_only = [ele[5:] for ele in vlist._ref_record[2:]] for ele in ref_pair: ref_pair_dict[ele[0].split('|')[0] + '|' + ref_record_only[ele[1]-1]] = [ref_record_only[ele[1]-1], ele[1]] return ref_pair_dict @staticmethod def _merge_ref_pair(ref_pair_dict_1, ref_pair_dict_2): ''' Merge two ref_pair ''' new_count = len(ref_pair_dict_1) + 1 merge_ref_pair_dict = ref_pair_dict_1.copy() for key in ref_pair_dict_2.keys(): if key in ref_pair_dict_1.keys(): ref_pair_dict_2[key].append(ref_pair_dict_1[key][-1]) else: merge_ref_pair_dict[key+'_list2'] = [ref_pair_dict_2[key][0], new_count] ref_pair_dict_2[key].append(new_count) new_count += 1 return merge_ref_pair_dict, ref_pair_dict_2 @staticmethod def _renew_ref_number(ref_string, ref_pair, lineformat): if lineformat == 'long': t = [ref_string[:29], ref_string[29:].split()] elif lineformat == 'short': t = [' ', ref_string.split()] for i in range(0, len(t[1]), 2): if i <= len(t[1])-3: try: t[1][i] = str(ref_pair[t[1][i+1].replace('iso:', '').replace('wl:', '').replace('gf:', '') + '-' + t[1][i]][-1]) except: t[1][i] = '00' t[1] = ' '.join(t[1]) t = ' '.join(t) return t
[docs] @staticmethod def merge_list(vlist_1, vlist_2): ''' Combine two VALD line list. The two line list must have the same short/long format. Note that the code will use the metadata from vlist_1 as the ones in the combined line list. Lines with same 'species', 'wlcent', 'gflog' and 'excit' will be treated as duplicated lines and removed. Note: reference mismatch is known to be happen during line list merge. ''' # Check the format of the line lists. if vlist_1.lineformat != vlist_2.lineformat: raise ValueError('lineformat of the line lists not the same.') if vlist_1.medium != vlist_2.medium: raise ValueError('medium of the line lists not the same.') if vlist_1.unit != vlist_2.unit: raise ValueError('unit of the line lists not the same.') vlist_1_use, vlist_2_use = deepcopy(vlist_1), deepcopy(vlist_2) ref_pair_1 = ValdFile._get_ref_pair(vlist_1_use) ref_pair_2 = ValdFile._get_ref_pair(vlist_2_use) ref_pair_1, ref_pair_2 = ValdFile._merge_ref_pair(ref_pair_1, ref_pair_2) ref_pair_2_final = {} for key in ref_pair_2.keys(): ref_pair_2_final[key.split('|')[0]+'-'+str(ref_pair_2[key][1])] = ref_pair_2[key] # Replace the reference numbers in vlist_2 vlist_2_use._lines['reference'] = vlist_2_use._lines['reference'].apply(ValdFile._renew_ref_number, ref_pair=ref_pair_2_final, lineformat=vlist_2_use.lineformat) ref_record_combined = [] count = 1 num_length = len(str(len(ref_pair_1))) for key in ref_pair_1.keys(): ref_record_combined.append(f"{count:{num_length}.0f}. {ref_pair_1[key][0]}") count += 1 vlist_1_use._ref_record = vlist_1_use._ref_record[:2] + ref_record_combined # Concat the two dfs vlist_1_use._lines = pd.concat([vlist_1_use._lines, vlist_2_use._lines]) vlist_1_use._lines = vlist_1_use._lines[~vlist_1_use._lines.duplicated(subset=['species', 'wlcent', 'gflog', 'excit'], keep='first')].sort_values('wlcent').reset_index(drop=True) vlist_1_use.nlines = len(vlist_1_use) if vlist_1_use.valdtype == 'extract_stellar': header_split = vlist_1_use.header.split(',') header_split[2] = f' {len(vlist_1_use)}' vlist_1_use.header = ','.join(header_split) return vlist_1_use