update editorconfig, enforce via CI (#1343)
* update editorconfig to set unix-style newlines and trim whitespace * add editorconfig github action to the CI * fix whitespacepull/1379/head
parent
c2b82ea8ac
commit
da2efecbe2
|
@ -1,4 +1,16 @@
|
|||
# 2 space indentation
|
||||
# Top-most EditorConfig file
|
||||
root = true
|
||||
|
||||
# Unix-style newlines with a newline ending every file, utf-8 charset
|
||||
[*]
|
||||
end_of_line = lf
|
||||
trim_trailing_whitespace = true
|
||||
charset = utf-8
|
||||
|
||||
[*.py]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
# ignore binary files
|
||||
[{*.hwx,*.mlmodel,*.weights,*.golden}]
|
||||
end_of_line = unset
|
||||
trim_trailing_whitespace = unset
|
||||
|
|
|
@ -0,0 +1,13 @@
|
|||
name: EditorConfig Checker
|
||||
|
||||
on:
|
||||
push:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
editorconfig:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: editorconfig-checker/action-editorconfig-checker@main
|
||||
- run: editorconfig-checker
|
|
@ -249,7 +249,7 @@ jobs:
|
|||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
|
||||
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
|
||||
sudo apt update && \
|
||||
sudo apt install -y intel-oneapi-runtime-compilers intel-oneapi-runtime-opencl
|
||||
sudo apt install -y intel-oneapi-runtime-compilers intel-oneapi-runtime-opencl
|
||||
- name: Install packages (cuda)
|
||||
if: matrix.backend == 'cuda'
|
||||
run: |
|
||||
|
|
|
@ -9,7 +9,7 @@ if __name__ == "__main__":
|
|||
model.load_from_pretrained()
|
||||
run, special_names = jit_model(model, Tensor.randn(1,3,224,224))
|
||||
functions, statements, bufs, _bufs_to_save = compile_net(run, special_names)
|
||||
|
||||
|
||||
state = get_state_dict(model)
|
||||
weights = {id(x.lazydata.realized): name for name, x in state.items()}
|
||||
safe_save(state, path.join(path.dirname(__file__), "net.safetensors"))
|
||||
|
@ -28,7 +28,7 @@ if __name__ == "__main__":
|
|||
const getTensorBuffer = (safetensorBuffer, tensorMetadata) => {{
|
||||
return safetensorBuffer.subarray(...tensorMetadata.data_offsets);
|
||||
}}
|
||||
|
||||
|
||||
const createEmptyBuf = (device, size) => {{
|
||||
return device.createBuffer({{size, usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST }});
|
||||
}};
|
||||
|
@ -50,7 +50,7 @@ const addComputePass = (device, commandEncoder, pipeline, bufs, workgroup) => {{
|
|||
}};
|
||||
|
||||
{kernel_code}
|
||||
|
||||
|
||||
const setupNet = async (device, safetensor) => {{
|
||||
const metadata = getTensorMetadata(safetensor);
|
||||
|
||||
|
|
|
@ -56,7 +56,7 @@ canvas { display: none; }
|
|||
console.log(`${delta} ms ${label}`);
|
||||
document.getElementById('time').innerHTML = `${delta} ms ${label}`;
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
const getLabels = async () => (await fetch("https://raw.githubusercontent.com/anishathalye/imagenet-simple-labels/master/imagenet-simple-labels.json")).json();
|
||||
|
||||
|
@ -80,7 +80,7 @@ canvas { display: none; }
|
|||
const response = await fetch(resource)
|
||||
if (!response.ok) error("sir. that is not a good URL. try a new one");
|
||||
document.getElementById("imagebox").src = resource
|
||||
|
||||
|
||||
const img = new Image();
|
||||
img.crossOrigin = "Anonymous";
|
||||
img.onload = () => {
|
||||
|
|
|
@ -36,7 +36,7 @@ int main(int argc, char* argv[]) {
|
|||
CFTypeRef ivalues[2];
|
||||
ivalues[0] = CFStringCreateWithCString(kCFAllocatorDefault, argv[1], kCFStringEncodingUTF8);
|
||||
ivalues[1] = CFSTR("./");
|
||||
|
||||
|
||||
CFDictionaryRef iDictionary = CFDictionaryCreate(kCFAllocatorDefault, ikeys, ivalues, 2, &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
|
||||
CFArrayRef array = CFArrayCreate(kCFAllocatorDefault, (const void**)&iDictionary, 1, &kCFTypeArrayCallBacks);
|
||||
|
||||
|
@ -69,6 +69,5 @@ int main(int argc, char* argv[]) {
|
|||
int ret = ANECCompile(optionsDictionary, flagsDictionary, 0);
|
||||
printf("compile: %d\n", ret);
|
||||
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -142,7 +142,7 @@ namespace H11ANE {
|
|||
int ANE_ForgetFirmware();
|
||||
|
||||
|
||||
private: // size is 0x88
|
||||
private: // size is 0x88
|
||||
unsigned char unknown[0x88];
|
||||
};
|
||||
|
||||
|
|
|
@ -52,13 +52,13 @@ Sadly disabling amfi breaks things like vscode. You can runtime patch
|
|||
```
|
||||
# MacOS 12.4
|
||||
|
||||
smol :: ~/fun/tinygrad » sha1sum /usr/libexec/amfid
|
||||
smol :: ~/fun/tinygrad » sha1sum /usr/libexec/amfid
|
||||
0f7e7f7e41408f83d7ebc7564a3828f41cb2ab58 /usr/libexec/amfid
|
||||
|
||||
# with patching +0x8e38
|
||||
|
||||
(lldb) image list
|
||||
[ 0] 04B6DF6C-6068-3F18-81A7-978985574387 0x0000000102ad0000 /usr/libexec/amfid
|
||||
[ 0] 04B6DF6C-6068-3F18-81A7-978985574387 0x0000000102ad0000 /usr/libexec/amfid
|
||||
(lldb) p *(unsigned int *)0x102ad8e38=0xd2800000
|
||||
```
|
||||
|
||||
|
@ -67,7 +67,7 @@ This disables the entitlement check, then you don't need a bootarg. I wish Apple
|
|||
## Extracting ANEServices.framework
|
||||
|
||||
```
|
||||
# install xcode and
|
||||
# install xcode and
|
||||
sudo xcode-select --switch /Applications/Xcode.app
|
||||
# xcode also contains ANEServices.tbd
|
||||
brew install keith/formulae/dyld-shared-cache-extractor
|
||||
|
@ -87,7 +87,7 @@ https://github.com/antgroup-arclab/ANETools.git
|
|||
* frame #0: 0x00000001c250fecc AppleNeuralEngine`-[_ANEDaemonConnection loadModel:sandboxExtension:options:qos:withReply:]
|
||||
(lldb) po $x2
|
||||
_ANEModel: { modelURL=file:///var/folders/l8/38vj8bm52_gfgsqgdn__sh2w0000gn/T/test_F48D9B88-A68D-476F-ADC8-32BDAF9A2498.mlmodelc/ : key={"isegment":0,"inputs":{"image":{"shape":[1,1,1,64,1]},"image2":{"shape":[1,1,1,64,1]}},"outputs":{"probs":{"shape":[1,1,1,64,1]}}} : string_id=0x00000000 : program=(null) : state=1 : programHandle=0 : intermediateBufferHandle=0 : queueDepth=0 : attr={
|
||||
} : perfStatsMask=0}
|
||||
} : perfStatsMask=0}
|
||||
```
|
||||
|
||||
## Choices
|
||||
|
|
|
@ -79,7 +79,7 @@ rbreak ^ZinAneInstruction*
|
|||
weeee ZinIrRegBitPrintOutDebug_7u_
|
||||
print (void)debugregs(0, 0x0000000100211030+8, 3)
|
||||
|
||||
== min.plist ==
|
||||
== min.plist ==
|
||||
|
||||
Types: GOC, Conv, Broadcast, ScaledElementWise, Reshape, InputView, Neuron, Concat
|
||||
|
||||
|
@ -323,7 +323,7 @@ zin_ane_compiler v4.2.1
|
|||
ANECCompile(__CFDictionary *param_1, __CFDictionary *param_2, unsigned long param_3)
|
||||
|
||||
param_1:
|
||||
{
|
||||
{
|
||||
InputNetworks = (
|
||||
{
|
||||
NetworkPlistName = "net.plist";
|
||||
|
|
|
@ -1,99 +1,99 @@
|
|||
// ZinIrRegBitPrintOutDebug_7u_
|
||||
|
||||
Task_ID: 0
|
||||
Task_ID: 0
|
||||
|
||||
header = 10*4 = 0x28
|
||||
|
||||
aneTD.Header[0].TID = 0
|
||||
aneTD.Header[0].NID = 0
|
||||
aneTD.Header[0].LNID = 1
|
||||
aneTD.Header[0].EON = 1
|
||||
aneTD.Header[1].ExeCycles = 0
|
||||
aneTD.Header[1].NextSize = 0
|
||||
aneTD.Header[2].LogEvents = 1058
|
||||
aneTD.Header[3].Exceptions = 0
|
||||
|
||||
aneTD.Header[0].TID = 0
|
||||
aneTD.Header[0].NID = 0
|
||||
aneTD.Header[0].LNID = 1
|
||||
aneTD.Header[0].EON = 1
|
||||
aneTD.Header[1].ExeCycles = 0
|
||||
aneTD.Header[1].NextSize = 0
|
||||
aneTD.Header[2].LogEvents = 1058
|
||||
aneTD.Header[3].Exceptions = 0
|
||||
aneTD.Header[4].DebugLogEvents = 16775274
|
||||
aneTD.Header[5].DebugExceptions = 0
|
||||
aneTD.Header[6].DisallowAbort = 0
|
||||
aneTD.Header[6].TDSkip = 0
|
||||
aneTD.Header[6].KPC = 0
|
||||
aneTD.Header[6].SPL = 0
|
||||
aneTD.Header[6].TSR = 0
|
||||
aneTD.Header[6].SPC = 0
|
||||
aneTD.Header[6].DPC = 0
|
||||
aneTD.Header[6].TSE = 0
|
||||
aneTD.Header[6].DisallowAbort = 0
|
||||
aneTD.Header[6].TDSkip = 0
|
||||
aneTD.Header[6].KPC = 0
|
||||
aneTD.Header[6].SPL = 0
|
||||
aneTD.Header[6].TSR = 0
|
||||
aneTD.Header[6].SPC = 0
|
||||
aneTD.Header[6].DPC = 0
|
||||
aneTD.Header[6].TSE = 0
|
||||
aneTD.Header[6].NextPriority = 0
|
||||
aneTD.Header[6].TDE = 0
|
||||
aneTD.Header[6].SrcLoc = 1
|
||||
aneTD.Header[6].DstLoc = 1
|
||||
aneTD.Header[6].TQDis = 0
|
||||
aneTD.Header[7].NextPointer = 0
|
||||
aneTD.Header[8].RBase0 = 5
|
||||
aneTD.Header[8].RBE0 = 1
|
||||
aneTD.Header[8].RBase1 = 0
|
||||
aneTD.Header[8].RBE1 = 0
|
||||
aneTD.Header[8].WBase = 4
|
||||
aneTD.Header[8].WBE = 1
|
||||
aneTD.Header[8].TBase = 0
|
||||
aneTD.Header[8].TBE = 0
|
||||
aneTD.Header[8].ENE = 1
|
||||
aneTD.Header[9].KBase0 = 1
|
||||
aneTD.Header[9].KBE0 = 1
|
||||
aneTD.Header[9].KBase1 = 0
|
||||
aneTD.Header[9].KBE1 = 0
|
||||
aneTD.Header[9].KBase2 = 0
|
||||
aneTD.Header[9].KBE2 = 0
|
||||
aneTD.Header[9].KBase3 = 0
|
||||
aneTD.Header[9].KBE3 = 0
|
||||
|
||||
aneTD.Header[6].TDE = 0
|
||||
aneTD.Header[6].SrcLoc = 1
|
||||
aneTD.Header[6].DstLoc = 1
|
||||
aneTD.Header[6].TQDis = 0
|
||||
aneTD.Header[7].NextPointer = 0
|
||||
aneTD.Header[8].RBase0 = 5
|
||||
aneTD.Header[8].RBE0 = 1
|
||||
aneTD.Header[8].RBase1 = 0
|
||||
aneTD.Header[8].RBE1 = 0
|
||||
aneTD.Header[8].WBase = 4
|
||||
aneTD.Header[8].WBE = 1
|
||||
aneTD.Header[8].TBase = 0
|
||||
aneTD.Header[8].TBE = 0
|
||||
aneTD.Header[8].ENE = 1
|
||||
aneTD.Header[9].KBase0 = 1
|
||||
aneTD.Header[9].KBE0 = 1
|
||||
aneTD.Header[9].KBase1 = 0
|
||||
aneTD.Header[9].KBE1 = 0
|
||||
aneTD.Header[9].KBase2 = 0
|
||||
aneTD.Header[9].KBE2 = 0
|
||||
aneTD.Header[9].KBase3 = 0
|
||||
aneTD.Header[9].KBE3 = 0
|
||||
|
||||
0x28 = 00 F8 01 F4 = 0x1F800
|
||||
+0x30
|
||||
aneRegs.KernelDMASrc.CoeffBaseAddr[0].Addr = 0
|
||||
aneRegs.KernelDMASrc.CoeffBfrSize[0].MemBfrSize = 2
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[0].CacheHint = 2
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[0].CacheHint = 2
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[0].CrH = 0
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[0].En = 1
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[0].PrefetchParticipateEn = 0
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[0].PrefetchParticipateEn = 0
|
||||
aneRegs.KernelDMASrc.CoeffBaseAddr[1].Addr = 0
|
||||
aneRegs.KernelDMASrc.CoeffBfrSize[1].MemBfrSize = 1
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[1].CacheHint = 2
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[1].CacheHint = 2
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[1].CrH = 0
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[1].En = 0
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[1].PrefetchParticipateEn = 0
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[1].PrefetchParticipateEn = 0
|
||||
aneRegs.KernelDMASrc.CoeffBaseAddr[2].Addr = 0
|
||||
aneRegs.KernelDMASrc.CoeffBfrSize[2].MemBfrSize = 1
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[2].CacheHint = 2
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[2].CacheHint = 2
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[2].CrH = 0
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[2].En = 0
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[2].PrefetchParticipateEn = 0
|
||||
aneRegs.KernelDMASrc.CoeffDMAConfig[2].PrefetchParticipateEn = 0
|
||||
# there's 13 more of these
|
||||
aneRegs.KernelDMASrc.Spare0.Spare = 0
|
||||
aneRegs.KernelDMASrc.Spare1.Spare = 0
|
||||
aneRegs.KernelDMASrc.Spare1.Spare = 0
|
||||
|
||||
0x124 = 00 00 00 3C = 0
|
||||
+0x1d4
|
||||
aneRegs.Common.Cfg.AccDoubleBufEn = 1
|
||||
aneRegs.Common.Cfg.ActiveNE = 0
|
||||
aneRegs.Common.Cfg.AccDoubleBufEn = 1
|
||||
aneRegs.Common.Cfg.ActiveNE = 0
|
||||
aneRegs.Common.Cfg.ContextSwitchIn = 0
|
||||
aneRegs.Common.Cfg.ContextSwitchOut = 0
|
||||
aneRegs.Common.Cfg.ShMax = 1
|
||||
aneRegs.Common.Cfg.ShMin = 0
|
||||
aneRegs.Common.Cfg.ShPref = 1
|
||||
aneRegs.Common.Cfg.ShMax = 1
|
||||
aneRegs.Common.Cfg.ShMin = 0
|
||||
aneRegs.Common.Cfg.ShPref = 1
|
||||
aneRegs.Common.Cfg.SmallSourceMode = 0
|
||||
aneRegs.Common.ChCfg.InFmt = 2
|
||||
aneRegs.Common.ChCfg.OutFmt = 2
|
||||
aneRegs.Common.Cin.Cin = 1
|
||||
aneRegs.Common.ConvCfg.Kh = 1
|
||||
aneRegs.Common.ConvCfg.Kw = 1
|
||||
aneRegs.Common.ConvCfg.OCGSize = 0
|
||||
aneRegs.Common.ConvCfg.Ox = 1
|
||||
aneRegs.Common.ConvCfg.Oy = 1
|
||||
aneRegs.Common.ConvCfg.Px = 0
|
||||
aneRegs.Common.ConvCfg.Py = 0
|
||||
aneRegs.Common.ConvCfg.Sx = 1
|
||||
aneRegs.Common.ConvCfg.Sy = 1
|
||||
aneRegs.Common.Cout.Cout = 1
|
||||
aneRegs.Common.DPE.Category = 0
|
||||
aneRegs.Common.ChCfg.InFmt = 2
|
||||
aneRegs.Common.ChCfg.OutFmt = 2
|
||||
aneRegs.Common.Cin.Cin = 1
|
||||
aneRegs.Common.ConvCfg.Kh = 1
|
||||
aneRegs.Common.ConvCfg.Kw = 1
|
||||
aneRegs.Common.ConvCfg.OCGSize = 0
|
||||
aneRegs.Common.ConvCfg.Ox = 1
|
||||
aneRegs.Common.ConvCfg.Oy = 1
|
||||
aneRegs.Common.ConvCfg.Px = 0
|
||||
aneRegs.Common.ConvCfg.Py = 0
|
||||
aneRegs.Common.ConvCfg.Sx = 1
|
||||
aneRegs.Common.ConvCfg.Sy = 1
|
||||
aneRegs.Common.Cout.Cout = 1
|
||||
aneRegs.Common.DPE.Category = 0
|
||||
aneRegs.Common.GroupConvCfg.ElemMultMode = 0
|
||||
aneRegs.Common.GroupConvCfg.NumGroups = 1
|
||||
aneRegs.Common.GroupConvCfg.UnicastCin = 1
|
||||
|
@ -132,7 +132,7 @@ aneRegs.TileDMASrc.PixelOffset[1].Offset = 0
|
|||
aneRegs.TileDMASrc.PixelOffset[2].Offset = 0
|
||||
aneRegs.TileDMASrc.PixelOffset[3].Offset = 0
|
||||
aneRegs.TileDMASrc.PlaneStride.PlaneStride = 3
|
||||
aneRegs.TileDMASrc.RowStride.Stride = 3
|
||||
aneRegs.TileDMASrc.RowStride.Stride = 3
|
||||
aneRegs.TileDMASrc.Spare0.Spare = 0
|
||||
aneRegs.TileDMASrc.Spare1.Spare = 0
|
||||
|
||||
|
@ -145,7 +145,7 @@ aneRegs.L2.ResultCfg.AliasPlanarRslt = 0
|
|||
aneRegs.L2.ResultCfg.AliasPlanarSrc = 0
|
||||
aneRegs.L2.ResultCfg.ResultType = 2
|
||||
aneRegs.L2.ResultCfg.DMACmpVec = 0
|
||||
aneRegs.L2.ResultCfg.DMAFmt = 1
|
||||
aneRegs.L2.ResultCfg.DMAFmt = 1
|
||||
aneRegs.L2.ResultCfg.DMAInterleave = 1
|
||||
aneRegs.L2.ResultCfg.DMAOffsetCh = 0
|
||||
aneRegs.L2.ResultCfg.L2BfrMode = 1
|
||||
|
@ -173,38 +173,38 @@ aneRegs.L2.SourceRowStride.Stride = 10
|
|||
+0x2f0
|
||||
0x23C = 00 C8 00 10 = 0xC800
|
||||
+0x30c
|
||||
aneRegs.NE.AccBias.AccBias = 0
|
||||
aneRegs.NE.AccBias.AccBiasShift = 0
|
||||
aneRegs.NE.KernelCfg.GroupKernelReuse = 0
|
||||
aneRegs.NE.KernelCfg.KernelFmt = 0
|
||||
aneRegs.NE.KernelCfg.PalettizedBits = 8
|
||||
aneRegs.NE.KernelCfg.PalettizedEn = 0
|
||||
aneRegs.NE.KernelCfg.SparseFmt = 0
|
||||
aneRegs.NE.MACCfg.BiasMode = 0
|
||||
aneRegs.NE.MACCfg.BinaryPoint = 0
|
||||
aneRegs.NE.MACCfg.KernelMode = 1
|
||||
aneRegs.NE.MACCfg.MatrixBiasEn = 0
|
||||
aneRegs.NE.MACCfg.NonlinearMode = 2
|
||||
aneRegs.NE.MACCfg.OpMode = 4
|
||||
aneRegs.NE.MACCfg.PostScaleMode = 0
|
||||
aneRegs.NE.MatrixVectorBias.MatrixVectorBias = 0
|
||||
aneRegs.NE.PostScale.PostRightShift = 0
|
||||
aneRegs.NE.PostScale.PostScale = 15360
|
||||
aneRegs.NE.Spare0.Spare = 0
|
||||
aneRegs.NE.Spare1.Spare = 0
|
||||
aneRegs.NE.AccBias.AccBias = 0
|
||||
aneRegs.NE.AccBias.AccBiasShift = 0
|
||||
aneRegs.NE.KernelCfg.GroupKernelReuse = 0
|
||||
aneRegs.NE.KernelCfg.KernelFmt = 0
|
||||
aneRegs.NE.KernelCfg.PalettizedBits = 8
|
||||
aneRegs.NE.KernelCfg.PalettizedEn = 0
|
||||
aneRegs.NE.KernelCfg.SparseFmt = 0
|
||||
aneRegs.NE.MACCfg.BiasMode = 0
|
||||
aneRegs.NE.MACCfg.BinaryPoint = 0
|
||||
aneRegs.NE.MACCfg.KernelMode = 1
|
||||
aneRegs.NE.MACCfg.MatrixBiasEn = 0
|
||||
aneRegs.NE.MACCfg.NonlinearMode = 2
|
||||
aneRegs.NE.MACCfg.OpMode = 4
|
||||
aneRegs.NE.MACCfg.PostScaleMode = 0
|
||||
aneRegs.NE.MatrixVectorBias.MatrixVectorBias = 0
|
||||
aneRegs.NE.PostScale.PostRightShift = 0
|
||||
aneRegs.NE.PostScale.PostScale = 15360
|
||||
aneRegs.NE.Spare0.Spare = 0
|
||||
aneRegs.NE.Spare1.Spare = 0
|
||||
|
||||
0x254 = 00 78 01 18 = 0x17800
|
||||
+0x32c
|
||||
aneRegs.TileDMADst.BaseAddr.Addr = 0
|
||||
aneRegs.TileDMADst.BaseAddr.Addr = 0
|
||||
aneRegs.TileDMADst.DepthStride.DepthStride = 3
|
||||
aneRegs.TileDMADst.DMAConfig.BypassEOW = 0
|
||||
aneRegs.TileDMADst.DMAConfig.CacheHint = 3
|
||||
aneRegs.TileDMADst.DMAConfig.CrH = 0
|
||||
aneRegs.TileDMADst.DMAConfig.En = 1
|
||||
aneRegs.TileDMADst.DMAConfig.CrH = 0
|
||||
aneRegs.TileDMADst.DMAConfig.En = 1
|
||||
aneRegs.TileDMADst.DMAConfig.L2BfrMode = 1
|
||||
aneRegs.TileDMADst.Fmt.CmpVec = 0
|
||||
aneRegs.TileDMADst.Fmt.CmpVecFill = 0
|
||||
aneRegs.TileDMADst.Fmt.FmtMode = 1
|
||||
aneRegs.TileDMADst.Fmt.FmtMode = 1
|
||||
aneRegs.TileDMADst.Fmt.Interleave = 1
|
||||
aneRegs.TileDMADst.Fmt.MemFmt = 2
|
||||
aneRegs.TileDMADst.Fmt.OffsetCh = 0
|
||||
|
|
|
@ -214,7 +214,7 @@ if __name__ == "__main__":
|
|||
mdf = ane.pack(dd, md)
|
||||
assert(md == mdf)
|
||||
|
||||
comp = ane.compile(dat)
|
||||
comp = ane.compile(dat)
|
||||
ret = ane.run(comp, tin, tout)
|
||||
print("** after **")
|
||||
print(tind)
|
||||
|
|
|
@ -24,7 +24,7 @@ if not pathlib.Path(BASEDIR/'val2017').is_dir():
|
|||
with zipfile.ZipFile(fn, 'r') as zip_ref:
|
||||
zip_ref.extractall(BASEDIR)
|
||||
fn.unlink()
|
||||
|
||||
|
||||
|
||||
if not pathlib.Path(BASEDIR/'annotations').is_dir():
|
||||
fn = BASEDIR/'annotations_trainval2017.zip'
|
||||
|
@ -178,7 +178,7 @@ def evaluate_predictions_on_coco(json_result_file, iou_type="bbox"):
|
|||
with open(json_result_file, "r") as f:
|
||||
for line in f:
|
||||
coco_results.append(json.loads(line))
|
||||
|
||||
|
||||
coco_gt = COCO(str(BASEDIR/'annotations/instances_val2017.json'))
|
||||
set_of_json = remove_dup([json.dumps(d, cls=NpEncoder) for d in coco_results])
|
||||
unique_list = [json.loads(s) for s in set_of_json]
|
||||
|
@ -186,7 +186,7 @@ def evaluate_predictions_on_coco(json_result_file, iou_type="bbox"):
|
|||
with open(f'{json_result_file}.flattend', "w") as f:
|
||||
json.dump(unique_list, f)
|
||||
|
||||
coco_dt = coco_gt.loadRes(str(f'{json_result_file}.flattend'))
|
||||
coco_dt = coco_gt.loadRes(str(f'{json_result_file}.flattend'))
|
||||
coco_eval = COCOeval(coco_gt, coco_dt, iou_type)
|
||||
coco_eval.evaluate()
|
||||
coco_eval.accumulate()
|
||||
|
|
|
@ -56,7 +56,7 @@ bufs = [c,a,b]
|
|||
module = ir.Module(name=__file__)
|
||||
func = ir.Function(module, ir.FunctionType(ir.IntType(64), [ir.FloatType().as_pointer()]*3), name='exec')
|
||||
|
||||
# load all
|
||||
# load all
|
||||
entry = ir.IRBuilder(func.append_basic_block(name="entry"))
|
||||
zm, xm, ym = [entry.ptrtoint(func.args[i], ir.IntType(64)) for i in range(3)]
|
||||
|
||||
|
|
|
@ -135,7 +135,7 @@ int main() {
|
|||
for (int i = 0; i < 4000; i++) {
|
||||
memset(C, 0, N*N*sizeof(float));
|
||||
|
||||
#if NTHREADS != 1
|
||||
#if NTHREADS != 1
|
||||
nready = 0;
|
||||
ndone = 0;
|
||||
pthread_mutex_lock(&lock);
|
||||
|
@ -147,7 +147,7 @@ int main() {
|
|||
#endif
|
||||
|
||||
uint64_t start = nanos();
|
||||
#if NTHREADS == 1
|
||||
#if NTHREADS == 1
|
||||
matmul(0, N);
|
||||
#else
|
||||
// unlocking mutex starts threads
|
||||
|
@ -156,7 +156,7 @@ int main() {
|
|||
#endif
|
||||
uint64_t end = nanos();
|
||||
|
||||
#if NTHREADS != 1
|
||||
#if NTHREADS != 1
|
||||
for (int j = 0; j < NTHREADS; j++) {
|
||||
pthread_join(threads[j], NULL);
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ if __name__ == "__main__":
|
|||
et = time.monotonic()
|
||||
s = et-st
|
||||
print(f"{flop/s * 1e-9:.2f} GFLOP/S, {s*1e3:.2f} ms")
|
||||
|
||||
|
||||
with open("/tmp/matmul", "wb") as f:
|
||||
f.write(A.data)
|
||||
f.write(B.data)
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
#!/bin/bash -e
|
||||
/opt/intel/oneapi/compiler/latest/linux/bin-llvm/clang++ joint_matrix_bfloat16.cpp -fsycl
|
||||
SYCL_PI_TRACE=1 ./a.out
|
||||
SYCL_PI_TRACE=1 ./a.out
|
||||
|
|
|
@ -23,7 +23,7 @@ class TestImage(unittest.TestCase):
|
|||
t1 = t1.sum()
|
||||
t1.realize()
|
||||
assert t1.numpy() == 16*4*4*4, f"got {t1.numpy()}"
|
||||
|
||||
|
||||
def test_add_image(self):
|
||||
t1 = Tensor.ones(16, 16, 1).reshape(16, 4, 4) + 3
|
||||
t2 = Tensor.ones(16, 16, 1).reshape(16, 4, 4) + 4
|
||||
|
@ -38,7 +38,7 @@ class TestImage(unittest.TestCase):
|
|||
tiny_conv = Conv2d(in_chans, out_chans, 3, bias=None, padding=1)
|
||||
tiny_dat = Tensor.ones(bs, 12, 64, 128)
|
||||
tiny_conv(tiny_dat).realize()
|
||||
|
||||
|
||||
def test_op_conv(self):
|
||||
bs, in_chans, out_chans = 1,12,32
|
||||
tiny_conv = Conv2d(in_chans, out_chans, 3, bias=None, padding=1)
|
||||
|
|
|
@ -366,7 +366,7 @@ class TestOpt(unittest.TestCase):
|
|||
a = Tensor.ones(n, m).sum(axis).reshape(n, 1).expand(n, m).sum(axis)
|
||||
a.realize()
|
||||
cache_len = len(GlobalCounters.cache)
|
||||
np.testing.assert_allclose(a.numpy(), b.numpy(), rtol=1e-3, atol=1e-5)
|
||||
np.testing.assert_allclose(a.numpy(), b.numpy(), rtol=1e-3, atol=1e-5)
|
||||
return cache_len
|
||||
|
||||
def test_expand_reduce_is_folded_on_same_axis(self):
|
||||
|
@ -377,9 +377,9 @@ class TestOpt(unittest.TestCase):
|
|||
a = Tensor.ones(n, n).sum(axis).reshape(n, 1).expand(n, n).sum(axis)
|
||||
a.realize()
|
||||
cache_len = len(GlobalCounters.cache)
|
||||
np.testing.assert_allclose(a.numpy(), b.numpy(), rtol=1e-3, atol=1e-5)
|
||||
np.testing.assert_allclose(a.numpy(), b.numpy(), rtol=1e-3, atol=1e-5)
|
||||
return cache_len
|
||||
|
||||
|
||||
def test_expand_reduce_is_not_folded_on_different_axes(self):
|
||||
axis1, axis2 = 0, 1
|
||||
for n in [4, 8, 16]:
|
||||
|
@ -388,7 +388,7 @@ class TestOpt(unittest.TestCase):
|
|||
a = Tensor.ones(n, n).sum(axis1).reshape(n, 1).expand(n, n).sum(axis2)
|
||||
a.realize()
|
||||
cache_len = len(GlobalCounters.cache)
|
||||
np.testing.assert_allclose(a.numpy(), b.numpy(), rtol=1e-3, atol=1e-5)
|
||||
np.testing.assert_allclose(a.numpy(), b.numpy(), rtol=1e-3, atol=1e-5)
|
||||
return cache_len
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -13,22 +13,22 @@ class TestYOLOv8(unittest.TestCase):
|
|||
for variant in ['n', 's', 'm', 'l', 'x']:
|
||||
weights_location = Path(__file__).parent.parent.parent / "weights" / f'yolov8{variant}.safetensors'
|
||||
download_file(f'https://gitlab.com/r3sist/yolov8_weights/-/raw/master/yolov8{variant}.safetensors', weights_location)
|
||||
|
||||
depth, width, ratio = get_variant_multiples(variant)
|
||||
TinyYolov8 = YOLOv8(w=width, r=ratio, d=depth, num_classes=80)
|
||||
|
||||
depth, width, ratio = get_variant_multiples(variant)
|
||||
TinyYolov8 = YOLOv8(w=width, r=ratio, d=depth, num_classes=80)
|
||||
state_dict = safe_load(weights_location)
|
||||
load_state_dict(TinyYolov8, state_dict)
|
||||
print(f'successfully loaded weights for yolov{variant}')
|
||||
|
||||
|
||||
def test_predictions(self):
|
||||
test_image_urls = ['https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg', 'https://www.aljazeera.com/wp-content/uploads/2022/10/2022-04-28T192650Z_1186456067_UP1EI4S1I0P14_RTRMADP_3_SOCCER-ENGLAND-MUN-CHE-REPORT.jpg']
|
||||
variant = 'n'
|
||||
weights_location = Path(__file__).parent.parent.parent / "weights" / f'yolov8{variant}.safetensors'
|
||||
depth, width, ratio = get_variant_multiples(variant)
|
||||
TinyYolov8 = YOLOv8(w=width, r=ratio, d=depth, num_classes=80)
|
||||
depth, width, ratio = get_variant_multiples(variant)
|
||||
TinyYolov8 = YOLOv8(w=width, r=ratio, d=depth, num_classes=80)
|
||||
state_dict = safe_load(weights_location)
|
||||
load_state_dict(TinyYolov8, state_dict)
|
||||
|
||||
|
||||
for i in range(len(test_image_urls)):
|
||||
img_stream = io.BytesIO(fetch(test_image_urls[i]))
|
||||
img = cv2.imdecode(np.frombuffer(img_stream.read(), np.uint8), 1)
|
||||
|
@ -37,41 +37,40 @@ class TestYOLOv8(unittest.TestCase):
|
|||
post_predictions = postprocess(preds=predictions, img=test_image, orig_imgs=[img])
|
||||
labels = label_predictions(post_predictions)
|
||||
assert labels == {5: 1, 0: 4, 11: 1} if i == 0 else labels == {0: 13, 29: 1, 32: 1}
|
||||
|
||||
|
||||
def test_forward_pass_torch_onnx(self):
|
||||
variant = 'n'
|
||||
weights_location_onnx = Path(__file__).parent.parent.parent / "weights" / f'yolov8{variant}.onnx'
|
||||
weights_location_pt = Path(__file__).parent.parent.parent / "weights" / f'yolov8{variant}.pt'
|
||||
weights_location = Path(__file__).parent.parent.parent / "weights" / f'yolov8{variant}.safetensors'
|
||||
weights_location_onnx = Path(__file__).parent.parent.parent / "weights" / f'yolov8{variant}.onnx'
|
||||
weights_location_pt = Path(__file__).parent.parent.parent / "weights" / f'yolov8{variant}.pt'
|
||||
weights_location = Path(__file__).parent.parent.parent / "weights" / f'yolov8{variant}.safetensors'
|
||||
|
||||
download_file(f'https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8{variant}.pt', weights_location_pt)
|
||||
# the ultralytics export prints a lot of unneccesary things
|
||||
if not os.path.isfile(weights_location_onnx):
|
||||
model = ultralytics.YOLO(model=weights_location_pt, task='Detect')
|
||||
model.export(format="onnx",imgsz=[640, 480])
|
||||
model = ultralytics.YOLO(model=weights_location_pt, task='Detect')
|
||||
model.export(format="onnx",imgsz=[640, 480])
|
||||
|
||||
depth, width, ratio = get_variant_multiples(variant)
|
||||
TinyYolov8 = YOLOv8(w=width, r=ratio, d=depth, num_classes=80)
|
||||
depth, width, ratio = get_variant_multiples(variant)
|
||||
TinyYolov8 = YOLOv8(w=width, r=ratio, d=depth, num_classes=80)
|
||||
state_dict = safe_load(weights_location)
|
||||
load_state_dict(TinyYolov8, state_dict)
|
||||
|
||||
|
||||
image_location = [np.frombuffer(io.BytesIO(fetch('https://raw.githubusercontent.com/ultralytics/yolov5/master/data/images/bus.jpg')).read(), np.uint8)]
|
||||
orig_image = [cv2.imdecode(image_location[0], 1)]
|
||||
|
||||
|
||||
input_image = preprocess(orig_image)
|
||||
|
||||
|
||||
onnx_session = ort.InferenceSession(weights_location_onnx)
|
||||
onnx_input_name = onnx_session.get_inputs()[0].name
|
||||
onnx_output_name = onnx_session.get_outputs()[0].name
|
||||
onnx_output = onnx_session.run([onnx_output_name], {onnx_input_name: input_image.cpu().numpy()})
|
||||
|
||||
tiny_output = TinyYolov8(input_image)
|
||||
|
||||
# currently rtol is 0.025 because there is a 1-2% difference in our predictions
|
||||
# because of the zero padding in SPPF module (line 280) maxpooling layers rather than the -infinity in torch.
|
||||
# This difference does not make a difference "visually".
|
||||
|
||||
# currently rtol is 0.025 because there is a 1-2% difference in our predictions
|
||||
# because of the zero padding in SPPF module (line 280) maxpooling layers rather than the -infinity in torch.
|
||||
# This difference does not make a difference "visually".
|
||||
np.testing.assert_allclose(onnx_output[0], tiny_output.cpu().numpy(), atol=5e-4, rtol=0.025)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
@ -33,7 +33,7 @@ class TestBatchnorm(unittest.TestCase):
|
|||
return self.c2(self.c(x)).relu()
|
||||
lm = LilModel()
|
||||
model_step(lm)
|
||||
|
||||
|
||||
def test_two_conv_bn(self):
|
||||
class LilModel:
|
||||
def __init__(self):
|
||||
|
|
|
@ -6,7 +6,7 @@ from unittest.mock import patch, MagicMock
|
|||
|
||||
import torch
|
||||
import numpy as np
|
||||
from tinygrad.helpers import getenv
|
||||
from tinygrad.helpers import getenv
|
||||
from extra.utils import fetch, temp, download_file
|
||||
from tinygrad.state import torch_load
|
||||
from PIL import Image
|
||||
|
@ -33,7 +33,7 @@ class TestFetchRelative(unittest.TestCase):
|
|||
os.chdir(self.tempdir.name)
|
||||
with open('test_file.txt', 'x') as f:
|
||||
f.write("12345")
|
||||
|
||||
|
||||
def tearDown(self):
|
||||
os.chdir(self.working_dir)
|
||||
self.tempdir.cleanup()
|
||||
|
@ -41,7 +41,7 @@ class TestFetchRelative(unittest.TestCase):
|
|||
#test ./
|
||||
def test_fetch_relative_dotslash(self):
|
||||
self.assertEqual(b'12345', fetch("./test_file.txt"))
|
||||
|
||||
|
||||
#test ../
|
||||
def test_fetch_relative_dotdotslash(self):
|
||||
os.mkdir('test_file_path')
|
||||
|
@ -92,7 +92,7 @@ class TestUtils(unittest.TestCase):
|
|||
)
|
||||
if isfloat16: model = model.half()
|
||||
|
||||
path = temp(f"test_load_{isfloat16}.pt")
|
||||
path = temp(f"test_load_{isfloat16}.pt")
|
||||
torch.save(model.state_dict(), path)
|
||||
model2 = torch_load(path)
|
||||
|
||||
|
@ -102,5 +102,6 @@ class TestUtils(unittest.TestCase):
|
|||
assert a.shape == b.shape
|
||||
assert a.dtype == b.dtype
|
||||
assert np.array_equal(a, b)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
unittest.main()
|
||||
|
|
|
@ -10,7 +10,7 @@ function cleanup(err) {
|
|||
res.kill();
|
||||
if(err != null) {
|
||||
console.error(err);
|
||||
process.exit(1);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,7 +24,7 @@ async function waitForText(selector, text) {
|
|||
ready = true;
|
||||
break
|
||||
}
|
||||
await timeout(2000);
|
||||
await timeout(2000);
|
||||
n += 1
|
||||
}
|
||||
return ready;
|
||||
|
|
|
@ -11,7 +11,7 @@ METAL_XCODE = getenv("METAL_XCODE")
|
|||
class _METAL:
|
||||
def __init__(self):
|
||||
self.device = Metal.MTLCreateSystemDefaultDevice()
|
||||
self.dispatch_group = libdispatch.dispatch_group_create()
|
||||
self.dispatch_group = libdispatch.dispatch_group_create()
|
||||
self.mtl_queue = self.device.newCommandQueue()
|
||||
def command_buffer(self):
|
||||
command_buffer = self.mtl_queue.commandBuffer()
|
||||
|
|
Loading…
Reference in New Issue