Parquet schema

main
Jeff Moe 2023-10-07 17:56:47 -06:00
parent c70edc28a8
commit d8b2e61fe7
1 changed files with 15 additions and 7 deletions

View File

@ -13,23 +13,31 @@ import pyarrow.parquet as pq
default="/srv/gbif/gbif_2023-10-01/occurrence.parquet/000000",
type=click.File('rb'),
)
@click.option(
'-t',
'--table',
is_flag=True,
)
@click.option(
'-m',
'--meta',
is_flag=True,
)
def cli(filename, meta, table):
@click.option(
'-s',
'--schema',
is_flag=True,
)
@click.option(
'-t',
'--table',
is_flag=True,
)
def cli(filename, meta, schema, table):
print("Reading", filename.name)
parquet_file = pq.ParquetFile(filename)
if meta == True:
parquet_file = pq.ParquetFile(filename)
parquet_meta = parquet_file.metadata
print(parquet_meta)
#print(parquet_meta.num_columns)
if schema == True:
parquet_schema = pq.read_schema(filename)
print(parquet_schema)
if table == True:
parquet_table = pq.read_table(filename)
print(parquet_table)