celestia/src/tools/celestia-gaia-stardb/download_data.py

330 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#!/usr/bin/env python3
# gaia-stardb: Processing Gaia DR2 for celestia.Sci/Celestia
# Copyright (C) 20192020 Andrew Tribick
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""Routines for downloading the data files."""
import contextlib
import getpass
import os
from zipfile import ZipFile
import numpy as np
import requests
import astropy.io.ascii as io_ascii
import astropy.io.votable as votable
from astropy import units
from astropy.table import Table, join, unique, vstack
from astroquery.gaia import Gaia
from astroquery.utils.tap import Tap
from astroquery.xmatch import XMatch
from parse_utils import open_cds_tarfile
def yesno(prompt: str, default: bool=False) -> bool:
"""Prompt the user for yes/no input."""
if default:
new_prompt = f'{prompt} (Y/n): '
else:
new_prompt = f'{prompt} (y/N): '
while True:
answer = input(new_prompt)
if answer == '':
return default
if answer in ('y', 'Y'):
return True
if answer in ('n', 'N'):
return False
def proceed_checkfile(filename: str) -> bool:
"""Check if a file exists, if so prompt the user if they want to replace it."""
if os.path.exists(filename):
if yesno(f'{filename} already exists, replace?'):
with contextlib.suppress(FileNotFoundError):
os.remove(filename)
else:
return False
return True
def download_file(outfile_name: str, url: str) -> bool:
"""Download a file using requests."""
if not proceed_checkfile(outfile_name):
return
print(f'Downloading {url}')
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(outfile_name, 'wb') as f:
f.write(response.raw.read())
else:
print('Failed to download')
# --- GAIA DATA DOWNLOAD ---
def download_gaia_data(colname: str, xindex_table: str, outfile_name: str) -> None:
"""Query and download Gaia data."""
query = f"""SELECT
x.source_id, x.original_ext_source_id AS {colname},
g.ra, g.dec, g.parallax, g.parallax_error, g.pmra,
g.pmdec, g.phot_g_mean_mag, g.bp_rp, g.teff_val,
d.r_est, d.r_lo, d.r_hi
FROM
{xindex_table} x
JOIN gaiadr2.gaia_source g ON g.source_id = x.source_id
LEFT JOIN external.gaiadr2_geometric_distance d ON d.source_id = x.source_id"""
print(query)
job = Gaia.launch_job_async(query,
dump_to_file=True,
output_file=outfile_name,
output_format='csv')
try:
job.save_results()
finally:
Gaia.remove_jobs(job.jobid)
CONESEARCH_URL = \
'https://www.cosmos.esa.int/documents/29201/1769576/Hipparcos2GaiaDR2coneSearch.zip'
def download_gaia_hip(username: str) -> None:
"""Download HIP data from the Gaia archive."""
hip_file = os.path.join('gaia', 'gaiadr2_hip-result.csv')
if not proceed_checkfile(hip_file):
return
conesearch_file = os.path.join('gaia', 'hip2conesearch.zip')
if proceed_checkfile(conesearch_file):
download_file(conesearch_file, CONESEARCH_URL)
# the gaiadr2.hipparcos2_best_neighbour table misses a large number of HIP stars that are
# actually present, so use the mapping from Kervella et al. (2019) "Binarity of Hipparcos
# stars from Gaia pm anomaly" instead.
with open_cds_tarfile(os.path.join('vizier', 'hipgpma.tar.gz')) as tf:
hip_map = unique(tf.read_gzip('hipgpma.dat', ['HIP', 'GDR2']))
with ZipFile(conesearch_file, 'r') as csz:
with csz.open('Hipparcos2GaiaDR2coneSearch.csv', 'r') as f:
cone_map = io_ascii.read(f,
format='csv',
names=['HIP', 'GDR2', 'dist'],
include_names=['HIP', 'GDR2'])
cone_map = unique(cone_map)
hip_map = join(hip_map, cone_map, join_type='outer', keys='HIP', table_names=['pm', 'cone'])
hip_map['GDR2'] = hip_map['GDR2_pm'].filled(hip_map['GDR2_cone'])
hip_map.remove_columns(['GDR2_pm', 'GDR2_cone'])
hip_map.rename_column('HIP', 'original_ext_source_id')
hip_map.rename_column('GDR2', 'source_id')
Gaia.upload_table(upload_resource=hip_map, table_name='hipgpma')
try:
download_gaia_data('hip_id', f'user_{username}.hipgpma', hip_file)
finally:
Gaia.delete_user_table('hipgpma')
def _load_gaia_tyc_ids(filename: str) -> Table:
with open(filename, 'r') as f:
header = f.readline().split(',')
col_idx = header.index('tyc2_id')
tyc1 = []
tyc2 = []
tyc3 = []
for line in f:
try:
tyc2_id = line.split(',')[col_idx]
except IndexError:
continue
tyc = tyc2_id.split('-')
tyc1.append(int(tyc[0]))
tyc2.append(int(tyc[1]))
tyc3.append(int(tyc[2]))
return Table([tyc1, tyc2, tyc3], names=['TYC1','TYC2','TYC3'], dtype=('i4', 'i4', 'i4'))
def _load_ascc_tyc_ids(filename: str) -> Table:
data = None
with open_cds_tarfile(filename) as tf:
for data_file in tf.tf:
sections = os.path.split(data_file.name)
if len(sections) != 2 or sections[0] != '.' or not sections[1].startswith('cc'):
continue
section_data = tf.read_gzip(
os.path.splitext(sections[1])[0],
['TYC1', 'TYC2', 'TYC3'],
readme_name='cc*.dat')
if data is None:
data = section_data
else:
data = vstack([data, section_data], join_type='exact')
return data
def get_missing_tyc_ids(tyc_file: str, ascc_file: str) -> Table:
"""Finds the ASCC TYC ids that are not present in Gaia cross-match."""
print("Finding missing TYC ids in ASCC")
t_asc = unique(_load_ascc_tyc_ids(ascc_file))
t_gai = _load_gaia_tyc_ids(tyc_file)
t_gai['in_gaia'] = True
t_mgd = join(t_asc, t_gai, join_type='left')
t_mgd['in_gaia'] = t_mgd['in_gaia'].filled(False)
t_missing = t_mgd[np.logical_not(t_mgd['in_gaia'])]
t_missing = t_missing[t_missing['TYC1'] != 0] # remove invalid entries
return Table([[f"TYC {t['TYC1']}-{t['TYC2']}-{t['TYC3']}" for t in t_missing]], names=['id'])
def download_gaia_tyc(username: str) -> None:
"""Download TYC data from the Gaia archive."""
tyc_file = os.path.join('gaia', 'gaiadr2_tyc-result.csv')
if proceed_checkfile(tyc_file):
download_gaia_data('tyc2_id', 'gaiadr2.tycho2_best_neighbour', tyc_file)
# Use SIMBAD to fill in some of the missing entries
with contextlib.suppress(FileExistsError):
os.mkdir('simbad')
simbad_file = os.path.join('simbad', 'tyc-gaia.votable')
if proceed_checkfile(simbad_file):
ascc_file = os.path.join('vizier', 'ascc.tar.gz')
missing_ids = get_missing_tyc_ids(tyc_file, ascc_file)
print("Querying SIMBAD for Gaia DR2 identifiers")
simbad = Tap(url='http://simbad.u-strasbg.fr:80/simbad/sim-tap')
query = """SELECT
id1.id tyc_id, id2.id gaia_id
FROM
TAP_UPLOAD.missing_tyc src
JOIN IDENT id1 ON id1.id = src.id
JOIN IDENT id2 ON id2.oidref = id1.oidref
WHERE
id2.id LIKE 'Gaia DR2 %'"""
print(query)
job = simbad.launch_job_async(query,
upload_resource=missing_ids,
upload_table_name='missing_tyc',
output_file=simbad_file,
output_format='votable',
dump_to_file=True)
job.save_results()
tyc2_file = os.path.join('gaia', 'gaiadr2_tyc-result-extra.csv')
if proceed_checkfile(tyc2_file):
missing_ids = votable.parse(simbad_file).resources[0].tables[0].to_table()
missing_ids['tyc_id'] = [m[m.rfind(' ')+1:] for m in missing_ids['tyc_id'].astype('U')]
missing_ids.rename_column('tyc_id', 'original_ext_source_id')
missing_ids['gaia_id'] = [int(m[m.rfind(' ')+1:])
for m in missing_ids['gaia_id'].astype('U')]
missing_ids.rename_column('gaia_id', 'source_id')
Gaia.upload_table(upload_resource=missing_ids, table_name='tyc_missing')
try:
download_gaia_data('tyc2_id', 'user_'+username+'.tyc_missing', tyc2_file)
finally:
Gaia.delete_user_table('tyc_missing')
def download_gaia() -> None:
"""Download data from the Gaia archive."""
with contextlib.suppress(FileExistsError):
os.mkdir('gaia')
print('Login to Gaia Archive')
username = input('Username: ')
if not username:
print('Login aborted')
return
password = getpass.getpass('Password: ')
if not password:
print('Login aborted')
return
Gaia.login(user=username, password=password)
try:
download_gaia_hip(username)
download_gaia_tyc(username)
finally:
Gaia.logout()
# --- SAO XMATCH DOWNLOAD ---
def download_xmatch(cat1: str, cat2: str, outfile_name: str) -> None:
"""Download a cross-match from VizieR."""
if not proceed_checkfile(outfile_name):
return
result = XMatch.query(cat1=cat1,
cat2=cat2,
max_distance=5 * units.arcsec)
io_ascii.write(result, outfile_name, format='csv')
def download_sao_xmatch() -> None:
"""Download cross-matches to the SAO catalogue."""
with contextlib.suppress(FileExistsError):
os.mkdir('xmatch')
cross_matches = [
('vizier:I/131A/sao', 'vizier:I/311/hip2', 'sao_hip_xmatch.csv'),
('vizier:I/131A/sao', 'vizier:I/259/tyc2', 'sao_tyc2_xmatch.csv'),
('vizier:I/131A/sao', 'vizier:I/259/suppl_1', 'sao_tyc2_suppl1_xmatch.csv'),
('vizier:I/131A/sao', 'vizier:I/259/suppl_2', 'sao_tyc2_suppl2_xmatch.csv'),
]
for cat1, cat2, filename in cross_matches:
print(f'Downloading {cat1}-{cat2} crossmatch')
download_xmatch(cat1, cat2, os.path.join('xmatch', filename))
# --- VIZIER DOWNLOAD ---
def download_vizier() -> None:
"""Download catalogue archive files from VizieR."""
with contextlib.suppress(FileExistsError):
os.mkdir('vizier')
files_urls = [
('ascc.tar.gz', 'http://cdsarc.u-strasbg.fr/viz-bin/nph-Cat/tar.gz?I/280B'),
('hipgpma.tar.gz', 'https://cdsarc.unistra.fr/viz-bin/nph-Cat/tar.gz?J/A+A/623/A72'),
# for some reason, the SAO archive at VizieR does not work, so download files individually
('sao.dat.gz', 'https://cdsarc.unistra.fr/ftp/I/131A/sao.dat.gz'),
('sao.readme', 'https://cdsarc.unistra.fr/ftp/I/131A/ReadMe'),
('tyc2hd.tar.gz', 'https://cdsarc.unistra.fr/viz-bin/nph-Cat/tar.gz?IV/25'),
('tyc2spec.tar.gz', 'http://cdsarc.u-strasbg.fr/viz-bin/nph-Cat/tar.gz?III/231'),
('tyc2specnew.tar.gz', 'https://cdsarc.unistra.fr/viz-bin/nph-Cat/tar.gz?J/PAZh/34/21'),
('tyc2teff.tar.gz', 'http://cdsarc.u-strasbg.fr/viz-bin/nph-Cat/tar.gz?V/136'),
('ubvriteff.tar.gz', 'http://cdsarc.u-strasbg.fr/viz-bin/nph-Cat/tar.gz?J/ApJS/193/1'),
('xhip.tar.gz', 'http://cdsarc.u-strasbg.fr/viz-bin/nph-Cat/tar.gz?V/137D'),
]
for file_name, url in files_urls:
download_file(os.path.join('vizier', file_name), url)
if __name__ == "__main__":
download_vizier()
download_gaia()
download_sao_xmatch()