1
0
Fork 0

add line changes diff bot to CI (#1863)

pull/1870/head
Yixiang Gao 2023-09-15 15:29:58 -05:00 committed by GitHub
parent 29ac8293d7
commit 789c84a7a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 102 additions and 11 deletions

51
.github/workflows/szdiff.yml vendored 100644
View File

@ -0,0 +1,51 @@
name: Check Line Counts
on:
pull_request:
# Cancel the workflow in progress in newer build is about to start.
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
szdiff:
name: Core Library Line Difference
permissions:
contents: read
pull-requests: write
runs-on: ubuntu-latest
steps:
- name: Checkout code from pr
uses: actions/checkout@v2
with:
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}
path: pr
- name: Checkout code from base
uses: actions/checkout@v2
with:
ref: ${{ github.event.pull_request.base.sha }}
path: base
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Count Lines Of Code
run: |
pip install tabulate
BASE="$GITHUB_WORKSPACE/base"
PR="$GITHUB_WORKSPACE/pr"
cp "$PR/sz.py" .
echo "loc_content<<EOF" >> "$GITHUB_ENV"
python sz.py "$BASE" "$PR" >> "$GITHUB_ENV"
echo "EOF" >> "$GITHUB_ENV"
- name: Comment Code Lines
continue-on-error: false
uses: marocchino/sticky-pull-request-comment@v2
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
header: LOC
ignore_empty: true
skip_unchanged: true
recreate: true
message: ${{ env.loc_content }}

62
sz.py
View File

@ -1,6 +1,5 @@
#!/usr/bin/env python3
import os
from pathlib import Path
import os, sys
import token
import tokenize
import itertools
@ -8,21 +7,62 @@ from tabulate import tabulate
TOKEN_WHITELIST = [token.OP, token.NAME, token.NUMBER, token.STRING]
if __name__ == "__main__":
headers = ["Name", "Lines", "Tokens/Line"]
def gen_stats(base_path="."):
table = []
for path, subdirs, files in os.walk("tinygrad"):
for path, _, files in os.walk(os.path.join(base_path, "tinygrad")):
for name in files:
if not name.endswith(".py"): continue
filepath = Path(path) / name
filepath = os.path.join(path, name)
relfilepath = os.path.relpath(filepath, base_path)
with tokenize.open(filepath) as file_:
tokens = [t for t in tokenize.generate_tokens(file_.readline) if t.type in TOKEN_WHITELIST]
token_count, line_count = len(tokens), len(set([t.start[0] for t in tokens]))
table.append([filepath.as_posix(), line_count, token_count/line_count])
table.append([relfilepath, line_count, token_count/line_count])
return table
print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", floatfmt=".1f")+"\n")
def gen_diff(table_old, table_new):
table = []
files_new = set([x[0] for x in table_new])
files_old = set([x[0] for x in table_old])
added, deleted, unchanged = files_new - files_old, files_old - files_new, files_new & files_old
if added:
for file in added:
file_stat = [stats for stats in table_new if file in stats]
table.append([file_stat[0][0], file_stat[0][1], file_stat[0][1]-0, file_stat[0][2], file_stat[0][2]-0])
if deleted:
for file in deleted:
file_stat = [stats for stats in table_old if file in stats]
table.append([file_stat[0][0], 0, 0 - file_stat[0][1], 0, 0-file_stat[0][2]])
if unchanged:
for file in unchanged:
file_stat_old = [stats for stats in table_old if file in stats]
file_stat_new = [stats for stats in table_new if file in stats]
if file_stat_new[0][1]-file_stat_old[0][1] != 0 or file_stat_new[0][2]-file_stat_old[0][2] != 0:
table.append([file_stat_new[0][0], file_stat_new[0][1], file_stat_new[0][1]-file_stat_old[0][1], file_stat_new[0][2], file_stat_new[0][2]-file_stat_old[0][2]])
return table
for dir_name, group in itertools.groupby(sorted([(x[0].rsplit("/", 1)[0], x[1]) for x in table]), key=lambda x:x[0]):
print(f"{dir_name:30s} : {sum([x[1] for x in group]):6d}")
def display_diff(diff): return "+"+str(diff) if diff > 0 else str(diff)
print(f"\ntotal line count: {sum([x[1] for x in table])}")
if __name__ == "__main__":
if len(sys.argv) == 3:
headers = ["Name", "Lines", "Diff", "Tokens/Line", "Diff"]
table = gen_diff(gen_stats(sys.argv[1]), gen_stats(sys.argv[2]))
elif len(sys.argv) == 2:
headers = ["Name", "Lines", "Tokens/Line"]
table = gen_stats(sys.argv[1])
else:
headers = ["Name", "Lines", "Tokens/Line"]
table = gen_stats(".")
if table:
if len(sys.argv) == 3:
print("### Changes")
print("```")
print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", intfmt=(..., "d", "+d"), floatfmt=(..., ..., ..., ".1f", "+.1f"))+"\n")
print(f"\ntotal lines changes: {display_diff(sum([x[2] for x in table]))}")
print("```")
else:
print(tabulate([headers] + sorted(table, key=lambda x: -x[1]), headers="firstrow", floatfmt=".1f")+"\n")
for dir_name, group in itertools.groupby(sorted([(x[0].rsplit("/", 1)[0], x[1], x[2]) for x in table]), key=lambda x:x[0]):
print(f"{dir_name:30s} : {sum([x[1] for x in group]):6d}")
print(f"\ntotal line count: {sum([x[1] for x in table])}")