373 lines
15 KiB
Python
373 lines
15 KiB
Python
#!/usr/bin/env python3
|
||
|
||
# gaia-stardb: Processing Gaia DR2 for celestia.Sci/Celestia
|
||
# Copyright (C) 2019–2020 Andrew Tribick
|
||
#
|
||
# This program is free software; you can redistribute it and/or modify
|
||
# it under the terms of the GNU General Public License as published by
|
||
# the Free Software Foundation; either version 2 of the License, or
|
||
# (at your option) any later version.
|
||
#
|
||
# This program is distributed in the hope that it will be useful,
|
||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
# GNU General Public License for more details.
|
||
#
|
||
# You should have received a copy of the GNU General Public License along
|
||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||
|
||
"""Makes the star database."""
|
||
|
||
import contextlib
|
||
import gzip
|
||
import os
|
||
import struct
|
||
|
||
from zipfile import ZipFile, ZIP_DEFLATED
|
||
|
||
import numpy as np
|
||
import astropy.units as u
|
||
|
||
from astropy.table import MaskedColumn, Table, join, unique, vstack
|
||
|
||
from parse_hip import process_hip
|
||
from parse_tyc import process_tyc
|
||
from parse_utils import WorkaroundCDSReader, open_cds_tarfile
|
||
from spparse import CEL_UNKNOWN_STAR, parse_spectrum
|
||
|
||
VERSION = "1.0.4"
|
||
|
||
# remove the following objects from the output
|
||
|
||
EXCLUSIONS = [
|
||
60936, # quasar 3C 273
|
||
114110, # non-existent star (see HIP1 errata)
|
||
114176, # non-existent star (see HIP1 errata)
|
||
]
|
||
|
||
# temperatures from star.cpp, spectral types O3-M9
|
||
|
||
TEFF_SPEC = np.array([
|
||
52500, 48000, 44500, 41000, 38000, 35800, 33000,
|
||
30000, 25400, 22000, 18700, 17000, 15400, 14000, 13000, 11900, 10500,
|
||
9520, 9230, 8970, 8720, 8460, 8200, 8020, 7850, 7580, 7390,
|
||
7200, 7050, 6890, 6740, 6590, 6440, 6360, 6280, 6200, 6110,
|
||
6030, 5940, 5860, 5830, 5800, 5770, 5700, 5630, 5570, 5410,
|
||
5250, 5080, 4900, 4730, 4590, 4350, 4200, 4060, 3990, 3920,
|
||
3850, 3720, 3580, 3470, 3370, 3240, 3050, 2940, 2640, 2000])
|
||
|
||
TEFF_BINS = (TEFF_SPEC[:-1] + TEFF_SPEC[1:]) // 2
|
||
|
||
parse_spectrum_vec = np.vectorize(parse_spectrum, otypes=[np.uint16]) # pylint: disable=invalid-name
|
||
|
||
CEL_SPECS = parse_spectrum_vec(['OBAFGKM'[i//10]+str(i%10) for i in range(3, 70)])
|
||
|
||
def load_ubvri() -> Table:
|
||
"""Load UBVRI Teff calibration from VizieR archive."""
|
||
print('Loading UBVRI calibration')
|
||
with open_cds_tarfile(os.path.join('vizier', 'ubvriteff.tar.gz')) as tf:
|
||
return tf.read_gzip('table3.dat', ['V-K', 'B-V', 'V-I', 'J-K', 'H-K', 'Teff'])
|
||
|
||
def parse_spectra(data: Table) -> Table:
|
||
"""Parse the spectral types into the celestia.Sci format."""
|
||
print('Parsing spectral types')
|
||
data['SpType'] = data['SpType'].filled('')
|
||
sptypes = unique(data['SpType',])
|
||
sptypes['CelSpec'] = parse_spectrum_vec(sptypes['SpType'])
|
||
return join(data, sptypes)
|
||
|
||
def estimate_magnitudes(data: Table) -> None:
|
||
"""Estimates magnitudes and color indices from G magnitude and BP-RP.
|
||
|
||
Formula used is from Evans et al. (2018) "Gaia Data Release 2: Photometric
|
||
content and validation".
|
||
"""
|
||
print("Computing missing magnitudes and color indices")
|
||
|
||
bp_rp = data['bp_rp'].filled(0)
|
||
bp_rp2 = bp_rp**2
|
||
|
||
data['Vmag'] = MaskedColumn(
|
||
data['Vmag'].filled(
|
||
data['phot_g_mean_mag'].filled(np.nan) + 0.01760 + bp_rp*0.006860 + bp_rp2*0.1732))
|
||
data['e_Vmag'] = MaskedColumn(data['e_Vmag'].filled(0.045858))
|
||
data['Vmag'].mask = np.isnan(data['Vmag'])
|
||
data['e_Vmag'].mask = data['Vmag'].mask
|
||
|
||
bp_rp = data['bp_rp'].filled(np.nan)
|
||
bp_rp2 = bp_rp**2
|
||
|
||
imag = data['phot_g_mean_mag'].filled(np.nan) - 0.02085 - bp_rp*0.7419 + bp_rp2*0.09631
|
||
e_imag = np.where(np.isnan(imag), np.nan, 0.04956)
|
||
|
||
f_bmag = data['Bmag'].filled(np.nan)
|
||
f_vmag = data['Vmag'].filled(np.nan)
|
||
f_jmag = data['Jmag'].filled(np.nan)
|
||
f_hmag = data['Hmag'].filled(np.nan)
|
||
f_kmag = data['Kmag'].filled(np.nan)
|
||
f_e_bmag = data['e_Bmag'].filled(np.nan)
|
||
f_e_vmag = data['e_Vmag'].filled(np.nan)
|
||
f_e_jmag = data['e_Jmag'].filled(np.nan)
|
||
f_e_hmag = data['e_Hmag'].filled(np.nan)
|
||
f_e_kmag = data['e_Kmag'].filled(np.nan)
|
||
|
||
data['B-V'] = MaskedColumn(data['B-V'].filled(f_bmag - f_vmag))
|
||
data['e_B-V'] = MaskedColumn(data['e_B-V'].filled(np.sqrt(f_e_bmag**2 + f_e_vmag**2)))
|
||
data['V-I'] = MaskedColumn(data['V-I'].filled(f_vmag - imag))
|
||
data['e_V-I'] = MaskedColumn(data['e_V-I'].filled(np.sqrt(f_e_vmag**2 + e_imag**2)))
|
||
data['V-K'] = MaskedColumn(f_vmag - f_kmag)
|
||
data['e_V-K'] = MaskedColumn(np.sqrt(f_e_vmag**2 + f_e_kmag**2))
|
||
data['J-K'] = MaskedColumn(f_jmag - f_kmag)
|
||
data['e_J-K'] = MaskedColumn(np.sqrt(f_e_jmag**2 + f_e_kmag**2))
|
||
data['H-K'] = MaskedColumn(f_hmag - f_kmag)
|
||
data['e_H-K'] = MaskedColumn(np.sqrt(f_e_hmag**2 + f_e_kmag**2))
|
||
|
||
data['B-V'].mask = np.logical_or(data['B-V'].mask, np.isnan(data['B-V']))
|
||
data['e_B-V'].mask = np.logical_or(data['e_B-V'].mask, np.isnan(data['e_B-V']))
|
||
data['V-I'].mask = np.logical_or(data['V-I'].mask, np.isnan(data['V-I']))
|
||
data['e_V-I'].mask = np.logical_or(data['e_V-I'].mask, np.isnan(data['e_V-I']))
|
||
data['V-K'].mask = np.isnan(data['V-K'])
|
||
data['e_V-K'].mask = np.isnan(data['e_V-K'])
|
||
data['J-K'].mask = np.isnan(data['J-K'])
|
||
data['e_J-K'].mask = np.isnan(data['e_J-K'])
|
||
data['H-K'].mask = np.isnan(data['H-K'])
|
||
data['e_H-K'].mask = np.isnan(data['e_H-K'])
|
||
|
||
data.remove_columns(['Bmag', 'e_Bmag', 'e_Vmag', 'Jmag', 'e_Jmag', 'Hmag', 'e_Hmag',
|
||
'Kmag', 'e_Kmag'])
|
||
|
||
def estimate_temperatures(data: Table) -> None:
|
||
"""Estimate the temperature of stars."""
|
||
ubvri_data = load_ubvri()
|
||
print('Estimating temperatures from color indices')
|
||
|
||
indices = Table(
|
||
[
|
||
data['B-V'].filled(np.nan),
|
||
data['V-I'].filled(np.nan),
|
||
data['V-K'].filled(np.nan),
|
||
data['J-K'].filled(np.nan),
|
||
data['H-K'].filled(np.nan),
|
||
np.maximum(data['e_B-V'].filled(np.nan), 0.001),
|
||
np.maximum(data['e_V-I'].filled(np.nan), 0.001),
|
||
np.maximum(data['e_V-K'].filled(np.nan), 0.001),
|
||
np.maximum(data['e_J-K'].filled(np.nan), 0.001),
|
||
np.maximum(data['e_H-K'].filled(np.nan), 0.001),
|
||
],
|
||
names=['B-V','V-I','V-K','J-K','H-K',
|
||
'e_B-V','e_V-I','e_V-K','e_J-K','e_H-K'])
|
||
|
||
weights = np.full_like(data['HIP'], 0, dtype=np.float64)
|
||
teffs = np.full_like(data['HIP'], 0, dtype=np.float64)
|
||
for row in ubvri_data:
|
||
sumsq = np.maximum(
|
||
np.nan_to_num(((indices['B-V']-row['B-V'])/indices['e_B-V'])**2)
|
||
+ np.nan_to_num(((indices['V-K']-row['V-K'])/indices['e_V-K'])**2)
|
||
+ np.nan_to_num(((indices['J-K']-row['J-K'])/indices['e_J-K'])**2)
|
||
+ np.nan_to_num(((indices['V-I']-row['V-I'])/indices['e_V-I'])**2)
|
||
+ np.nan_to_num(((indices['H-K']-row['H-K'])/indices['e_H-K'])**2), 0.001)
|
||
teffs += row['Teff'] / sumsq
|
||
weights += 1.0 / sumsq
|
||
|
||
data['teff_est'] = teffs / weights
|
||
data['teff_est'].unit = u.K
|
||
|
||
def estimate_spectra(data: Table) -> Table:
|
||
"""Estimate the spectral type of stars."""
|
||
no_teff = data[data['teff_val'].mask]
|
||
# temporarily disable no-member error in pylint, as it cannot see the reduce method
|
||
# pylint: disable=no-member
|
||
has_indices = np.logical_and.reduce((no_teff['B-V'].mask,
|
||
no_teff['V-I'].mask,
|
||
no_teff['V-K'].mask,
|
||
no_teff['J-K'].mask,
|
||
no_teff['H-K'].mask))
|
||
# pylint: enable=no-member
|
||
no_teff = no_teff[np.logical_not(has_indices)]
|
||
estimate_temperatures(no_teff)
|
||
data = join(data,
|
||
no_teff['HIP', 'teff_est'],
|
||
keys=['HIP'],
|
||
join_type='left')
|
||
data['teff_val'] = data['teff_val'].filled(data['teff_est'].filled(np.nan))
|
||
data = data[np.logical_not(np.isnan(data['teff_val']))]
|
||
data['CelSpec'] = CEL_SPECS[np.digitize(data['teff_val'], TEFF_BINS)]
|
||
return data
|
||
|
||
def load_sao() -> Table:
|
||
"""Loads the SAO catalog."""
|
||
print("Loading SAO")
|
||
|
||
with open(os.path.join('vizier', 'sao.readme'), 'r') as readme:
|
||
reader = WorkaroundCDSReader('sao.dat', ['SAO', 'HD'], [np.int64, np.int64], readme)
|
||
|
||
with gzip.open(os.path.join('vizier', 'sao.dat.gz'), 'rt', encoding='ascii') as f:
|
||
data = reader.read(f)
|
||
|
||
data = unique(data.group_by('SAO'), keys=['HD'])
|
||
data = unique(data.group_by('HD'), keys=['SAO'])
|
||
return data
|
||
|
||
def merge_all() -> Table:
|
||
"""Merges the HIP and TYC data."""
|
||
hip_data = process_hip()
|
||
|
||
# extract the non-Gaia sources to make the merging easier
|
||
non_gaia = hip_data[hip_data['source_id'].mask]
|
||
|
||
# merge object data for objects in both catalogues
|
||
hip_data = join(hip_data[np.logical_not(hip_data['source_id'].mask)],
|
||
process_tyc(),
|
||
keys=['source_id'],
|
||
table_names=['hip', 'tyc'],
|
||
join_type='outer')
|
||
|
||
# Mask blank spectral type and component identifiers
|
||
for str_col in (c for c in hip_data.colnames if hip_data[c].dtype.kind == 'U'):
|
||
hip_data[str_col].mask = np.logical_or(hip_data[str_col].mask, hip_data[str_col] == '')
|
||
|
||
prefer_tyc = {'HD', 'SAO', 'Comp'}
|
||
|
||
for base_col in (c[:-4] for c in hip_data.colnames if c.endswith('_hip')):
|
||
hip_col = base_col + '_hip'
|
||
tyc_col = base_col + '_tyc'
|
||
hip_data.rename_column(hip_col, base_col)
|
||
if isinstance(hip_data[base_col], MaskedColumn):
|
||
mask = np.logical_and(hip_data[base_col].mask, hip_data[tyc_col].mask)
|
||
if base_col in prefer_tyc:
|
||
base_data = hip_data[tyc_col].filled(hip_data[base_col])
|
||
else:
|
||
base_data = hip_data[base_col].filled(hip_data[tyc_col])
|
||
hip_data[base_col] = MaskedColumn(base_data, mask=mask)
|
||
hip_data.remove_column(tyc_col)
|
||
|
||
hip_data['HIP'] = hip_data['HIP'].filled(hip_data['TYC'])
|
||
hip_data.remove_columns('TYC')
|
||
|
||
# Add the non-Gaia stars back into the dataset
|
||
hip_data = vstack([hip_data, non_gaia], join_type='outer', metadata_conflicts='silent')
|
||
|
||
# Merge SAO, preferring the values from the SAO catalogue
|
||
sao = load_sao()
|
||
sao = sao[np.isin(sao['HD'], hip_data[np.logical_not(hip_data['HD'].mask)]['HD'])]
|
||
hip_data['SAO'].mask = np.logical_or(hip_data['SAO'].mask,
|
||
np.isin(hip_data['SAO'], sao['SAO']))
|
||
|
||
hd_sao = join(hip_data[np.logical_not(hip_data['HD'].mask)],
|
||
sao,
|
||
keys=['HD'],
|
||
table_names=['xref', 'sao'],
|
||
join_type='left')
|
||
hd_sao.rename_column('SAO_xref', 'SAO')
|
||
hd_sao['SAO'] = MaskedColumn(hd_sao['SAO_sao'].filled(hd_sao['SAO']),
|
||
mask=np.logical_and(hd_sao['SAO'].mask,
|
||
hd_sao['SAO_sao'].mask))
|
||
hd_sao.remove_column('SAO_sao')
|
||
|
||
return vstack([hip_data[hip_data['HD'].mask], hd_sao], join_type='exact')
|
||
|
||
OBLIQUITY = np.radians(23.4392911)
|
||
COS_OBLIQUITY = np.cos(OBLIQUITY)
|
||
SIN_OBLIQUITY = np.sin(OBLIQUITY)
|
||
ROT_MATRIX = np.array([[1, 0, 0],
|
||
[0, COS_OBLIQUITY, SIN_OBLIQUITY],
|
||
[0, -SIN_OBLIQUITY, COS_OBLIQUITY]])
|
||
|
||
def process_data() -> Table:
|
||
"""Processes the missing data values."""
|
||
data = merge_all()
|
||
data = data[np.logical_not(data['dist_use'].mask)]
|
||
data = data[np.isin(data['HIP'], EXCLUSIONS, invert=True)]
|
||
estimate_magnitudes(data)
|
||
data = parse_spectra(data)
|
||
unknown_spectra = data[data['CelSpec'] == CEL_UNKNOWN_STAR]['HIP', 'teff_val', 'B-V', 'e_B-V',
|
||
'V-I', 'e_V-I', 'V-K', 'e_V-K',
|
||
'J-K', 'e_J-K', 'H-K', 'e_H-K']
|
||
unknown_spectra = estimate_spectra(unknown_spectra)
|
||
data = join(data,
|
||
unknown_spectra['HIP', 'CelSpec'],
|
||
keys=['HIP'],
|
||
join_type='left',
|
||
table_names=['data', 'est'])
|
||
data['CelSpec'] = np.where(data['CelSpec_data'] == CEL_UNKNOWN_STAR,
|
||
data['CelSpec_est'].filled(CEL_UNKNOWN_STAR),
|
||
data['CelSpec_data'])
|
||
data.remove_columns(['phot_g_mean_mag', 'bp_rp', 'teff_val', 'SpType', 'B-V', 'e_B-V', 'V-I',
|
||
'e_V-I', 'V-K', 'e_V-K', 'J-K', 'e_J-K', 'H-K', 'e_H-K', 'CelSpec_est',
|
||
'CelSpec_data'])
|
||
|
||
data['Vmag_abs'] = data['Vmag'] - 5*(np.log10(data['dist_use'])-1)
|
||
|
||
print('Converting coordinates to ecliptic frame')
|
||
|
||
data['ra'].convert_unit_to(u.rad)
|
||
data['dec'].convert_unit_to(u.rad)
|
||
data['dist_use'].convert_unit_to(u.lyr)
|
||
|
||
coords = np.matmul(ROT_MATRIX,
|
||
np.array([data['dist_use']*np.cos(data['ra'])*np.cos(data['dec']),
|
||
data['dist_use']*np.sin(data['dec']),
|
||
-data['dist_use']*np.sin(data['ra'])*np.cos(data['dec'])]))
|
||
data['x'] = coords[0]
|
||
data['y'] = coords[1]
|
||
data['z'] = coords[2]
|
||
|
||
data['x'].unit = u.lyr
|
||
data['y'].unit = u.lyr
|
||
data['z'].unit = u.lyr
|
||
|
||
return data
|
||
|
||
def write_starsdat(data: Table, outfile: str) -> None:
|
||
"""Write the stars.dat file."""
|
||
print('Writing stars.dat')
|
||
with open(outfile, 'wb') as f:
|
||
f.write(struct.pack('<8sHL', b'CELSTARS', 0x0100, len(data)))
|
||
print(f' Writing {len(data)} records')
|
||
fmt = struct.Struct('<L3fhH')
|
||
for hip, x, y, z, vmag_abs, celspec in zip(data['HIP'], data['x'], data['y'], data['z'],
|
||
data['Vmag_abs'], data['CelSpec']):
|
||
f.write(fmt.pack(hip, x, y, z, int(round(vmag_abs*256)), celspec))
|
||
|
||
def write_xindex(data: Table, field: str, outfile: str) -> None:
|
||
"""Write a cross-index file."""
|
||
print('Writing '+field+' cross-index')
|
||
print(' Extracting cross-index data')
|
||
data = data[np.logical_not(data[field].mask)]['HIP', 'Comp', field]
|
||
data['Comp'] = data['Comp'].filled('')
|
||
data = unique(data.group_by([field, 'Comp', 'HIP']), keys=[field])
|
||
print(f' Writing {len(data)} records')
|
||
with open(outfile, 'wb') as f:
|
||
f.write(struct.pack('<8sH', b'CELINDEX', 0x0100))
|
||
fmt = struct.Struct('<2L')
|
||
for hip, cat in zip(data['HIP'], data[field]):
|
||
f.write(fmt.pack(cat, hip))
|
||
|
||
def make_stardb() -> None:
|
||
"""Make the Celestia star database files."""
|
||
data = process_data()
|
||
|
||
with contextlib.suppress(FileExistsError):
|
||
os.mkdir('output')
|
||
|
||
write_starsdat(data, os.path.join('output', 'stars.dat'))
|
||
|
||
xindices = [
|
||
('HD', 'hdxindex.dat'),
|
||
('SAO', 'saoxindex.dat')
|
||
]
|
||
|
||
for fieldname, outfile in xindices:
|
||
write_xindex(data, fieldname, os.path.join('output', outfile))
|
||
|
||
print("Creating archive")
|
||
archivename = f'celestia-gaia-stardb-{VERSION}'
|
||
with ZipFile(f'{archivename}.zip', 'w', compression=ZIP_DEFLATED, compresslevel=9) as zf:
|
||
contents = ['stars.dat', 'hdxindex.dat', 'saoxindex.dat', 'LICENSE.txt', 'CREDITS.md']
|
||
for f in contents:
|
||
zf.write(os.path.join('output', f), arcname=os.path.join(archivename, f))
|
||
|
||
if __name__ == '__main__':
|
||
make_stardb()
|