Add option to print unique licenses used by The Stack

main
Jeff Moe 2023-11-24 17:23:47 -07:00
parent 29e9146847
commit df6a972c29
1 changed files with 17 additions and 2 deletions

View File

@ -29,6 +29,12 @@ def print_records(dataframe, color):
print(row)
def print_unique_licenses(dataframe):
licenses = dataframe["license"].unique().tolist()
for license in licenses:
print(license)
def main():
parser = argparse.ArgumentParser(
description="Specify the directory and record range to use"
@ -53,6 +59,12 @@ def main():
parser.add_argument(
"--color", "-c", action="store_true", help="Colorize the output"
)
parser.add_argument(
"--list-licenses",
"-l",
action="store_true",
help="List unique licenses in the file",
)
args = parser.parse_args()
directory = os.path.join(args.directory, "data/")
@ -76,8 +88,11 @@ def main():
else:
df = pd.read_parquet(os.path.join(directory, "lic.parquet"))
records = get_records(df, args)
print_records(records, args.color)
if args.list_licenses:
print_unique_licenses(df)
else:
records = get_records(df, args)
print_records(records, args.color)
if __name__ == "__main__":