1
0
Fork 0

more tinygrad examples

main
Jeff Moe 2024-02-06 11:17:35 -07:00
parent b4bb879ad5
commit 22d12bbad6
14 changed files with 1525 additions and 1 deletions

View File

@ -0,0 +1,242 @@
2024-02-06 11:09:02.997488: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-06 11:09:03.035957: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-06 11:09:03.036006: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-06 11:09:03.036946: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-06 11:09:03.042522: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-02-06 11:09:03.042694: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-06 11:09:03.830560: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
2024-02-06 11:09:05.325306: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-02-06 11:09:05.325453: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-02-06 11:09:05.420786: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0
2024-02-06 11:09:05.421124: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
tinygrad: [0.270704448223114, 0.6882184743881226, 0.8074522614479065, 0.5307921767234802]
compiled: [0.270704, 0.688218, 0.807452, 0.530792]
keras: [0.2707044 0.6882185 0.8074523 0.5307921]
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define max(x,y) ((x>y)?x:y)
#define int64 long
#define half __fp16
#define uchar unsigned char
#include <stdbool.h>
float buf_0[64];
float input0[128];
float buf_1[2048];
float buf_2[64];
float buf_3[128];
float buf_4[2048];
float output0[16];
float buf_5[512];
void r_16_32(float* restrict data0, const float* restrict data1, const float* restrict data2, const float* restrict data3) {
float val0 = data1[0];
float val1 = data1[1];
float val2 = data1[2];
float val3 = data1[3];
float val4 = data1[4];
float val5 = data1[5];
float val6 = data1[6];
float val7 = data1[7];
float val8 = data1[8];
float val9 = data1[9];
float val10 = data1[10];
float val11 = data1[11];
float val12 = data1[12];
float val13 = data1[13];
float val14 = data1[14];
float val15 = data1[15];
float val16 = data1[16];
float val17 = data1[17];
float val18 = data1[18];
float val19 = data1[19];
float val20 = data1[20];
float val21 = data1[21];
float val22 = data1[22];
float val23 = data1[23];
float val24 = data1[24];
float val25 = data1[25];
float val26 = data1[26];
float val27 = data1[27];
float val28 = data1[28];
float val29 = data1[29];
float val30 = data1[30];
float val31 = data1[31];
for (int ridx0 = 0; ridx0 < 16; ridx0++) {
float acc0 = 0.0f;
float val32 = data2[ridx0];
float val33 = data2[ridx0+16];
float val34 = data2[ridx0+32];
float val35 = data2[ridx0+48];
float val36 = data2[ridx0+64];
float val37 = data2[ridx0+80];
float val38 = data2[ridx0+96];
float val39 = data2[ridx0+112];
float val40 = data2[ridx0+128];
float val41 = data2[ridx0+144];
float val42 = data2[ridx0+160];
float val43 = data2[ridx0+176];
float val44 = data2[ridx0+192];
float val45 = data2[ridx0+208];
float val46 = data2[ridx0+224];
float val47 = data2[ridx0+240];
float val48 = data2[ridx0+256];
float val49 = data2[ridx0+272];
float val50 = data2[ridx0+288];
float val51 = data2[ridx0+304];
float val52 = data2[ridx0+320];
float val53 = data2[ridx0+336];
float val54 = data2[ridx0+352];
float val55 = data2[ridx0+368];
float val56 = data2[ridx0+384];
float val57 = data2[ridx0+400];
float val58 = data2[ridx0+416];
float val59 = data2[ridx0+432];
float val60 = data2[ridx0+448];
float val61 = data2[ridx0+464];
float val62 = data2[ridx0+480];
float val63 = data2[ridx0+496];
float val64 = data3[ridx0];
float alu0 = max(((val31*val63)+((val30*val62)+((val29*val61)+((val28*val60)+((val27*val59)+((val26*val58)+((val25*val57)+((val24*val56)+((val23*val55)+((val22*val54)+((val21*val53)+((val20*val52)+((val19*val51)+((val18*val50)+((val17*val49)+((val16*val48)+((val15*val47)+((val14*val46)+((val13*val45)+((val12*val44)+((val11*val43)+((val10*val42)+((val9*val41)+((val8*val40)+((val7*val39)+((val6*val38)+((val5*val37)+((val4*val36)+((val3*val35)+((val2*val34)+((val1*val33)+((val0*val32)+acc0)))))))))))))))))))))))))))))))),0.0f);
data0[ridx0] = (alu0*val64);
}
}
void r_32_16(float* restrict data0, const float* restrict data1, const float* restrict data2) {
float val0 = data1[0];
float val1 = data1[1];
float val2 = data1[2];
float val3 = data1[3];
float val4 = data1[4];
float val5 = data1[5];
float val6 = data1[6];
float val7 = data1[7];
float val8 = data1[8];
float val9 = data1[9];
float val10 = data1[10];
float val11 = data1[11];
float val12 = data1[12];
float val13 = data1[13];
float val14 = data1[14];
float val15 = data1[15];
for (int ridx0 = 0; ridx0 < 32; ridx0++) {
float acc0 = 0.0f;
float val16 = data2[ridx0];
float val17 = data2[ridx0+32];
float val18 = data2[ridx0+64];
float val19 = data2[ridx0+96];
float val20 = data2[ridx0+128];
float val21 = data2[ridx0+160];
float val22 = data2[ridx0+192];
float val23 = data2[ridx0+224];
float val24 = data2[ridx0+256];
float val25 = data2[ridx0+288];
float val26 = data2[ridx0+320];
float val27 = data2[ridx0+352];
float val28 = data2[ridx0+384];
float val29 = data2[ridx0+416];
float val30 = data2[ridx0+448];
float val31 = data2[ridx0+480];
float alu0 = max(((val15*val31)+((val14*val30)+((val13*val29)+((val12*val28)+((val11*val27)+((val10*val26)+((val9*val25)+((val8*val24)+((val7*val23)+((val6*val22)+((val5*val21)+((val4*val20)+((val3*val19)+((val2*val18)+((val1*val17)+((val0*val16)+acc0)))))))))))))))),0.0f);
data0[ridx0] = alu0;
}
}
void r_4_32(float* restrict data0, const float* restrict data1, const float* restrict data2) {
float val0 = data1[0];
float val1 = data1[1];
float val2 = data1[2];
float val3 = data1[3];
float val4 = data1[4];
float val5 = data1[5];
float val6 = data1[6];
float val7 = data1[7];
float val8 = data1[8];
float val9 = data1[9];
float val10 = data1[10];
float val11 = data1[11];
float val12 = data1[12];
float val13 = data1[13];
float val14 = data1[14];
float val15 = data1[15];
float val16 = data1[16];
float val17 = data1[17];
float val18 = data1[18];
float val19 = data1[19];
float val20 = data1[20];
float val21 = data1[21];
float val22 = data1[22];
float val23 = data1[23];
float val24 = data1[24];
float val25 = data1[25];
float val26 = data1[26];
float val27 = data1[27];
float val28 = data1[28];
float val29 = data1[29];
float val30 = data1[30];
float val31 = data1[31];
for (int ridx0 = 0; ridx0 < 4; ridx0++) {
float acc0 = 0.0f;
float val32 = data2[ridx0];
float val33 = data2[ridx0+4];
float val34 = data2[ridx0+8];
float val35 = data2[ridx0+12];
float val36 = data2[ridx0+16];
float val37 = data2[ridx0+20];
float val38 = data2[ridx0+24];
float val39 = data2[ridx0+28];
float val40 = data2[ridx0+32];
float val41 = data2[ridx0+36];
float val42 = data2[ridx0+40];
float val43 = data2[ridx0+44];
float val44 = data2[ridx0+48];
float val45 = data2[ridx0+52];
float val46 = data2[ridx0+56];
float val47 = data2[ridx0+60];
float val48 = data2[ridx0+64];
float val49 = data2[ridx0+68];
float val50 = data2[ridx0+72];
float val51 = data2[ridx0+76];
float val52 = data2[ridx0+80];
float val53 = data2[ridx0+84];
float val54 = data2[ridx0+88];
float val55 = data2[ridx0+92];
float val56 = data2[ridx0+96];
float val57 = data2[ridx0+100];
float val58 = data2[ridx0+104];
float val59 = data2[ridx0+108];
float val60 = data2[ridx0+112];
float val61 = data2[ridx0+116];
float val62 = data2[ridx0+120];
float val63 = data2[ridx0+124];
data0[ridx0] = (1.0f/(1.0f+exp2((((val31*val63)+((val30*val62)+((val29*val61)+((val28*val60)+((val27*val59)+((val26*val58)+((val25*val57)+((val24*val56)+((val23*val55)+((val22*val54)+((val21*val53)+((val20*val52)+((val19*val51)+((val18*val50)+((val17*val49)+((val16*val48)+((val15*val47)+((val14*val46)+((val13*val45)+((val12*val44)+((val11*val43)+((val10*val42)+((val9*val41)+((val8*val40)+((val7*val39)+((val6*val38)+((val5*val37)+((val4*val36)+((val3*val35)+((val2*val34)+((val1*val33)+((val0*val32)+acc0))))))))))))))))))))))))))))))))*(-1.4426950408889634f)))));
}
}
void net(float* input0, float* output0) {
r_16_32(buf_0, input0, buf_1, buf_2);
r_32_16(buf_3, buf_0, buf_4);
r_4_32(output0, buf_3, buf_5);
}
void initialize(float *weights) {
memcpy(buf_1, weights + 0, 8192);
memcpy(buf_2, weights + 512, 256);
memcpy(buf_4, weights + 528, 8192);
memcpy(buf_5, weights + 1040, 2048);
}
int main(int argc, char *argv[]) {
// read in the weights from disk
FILE *f = fopen("/tmp/tf_weights", "rb");
float *weights = (float *)malloc(4672);
fread(weights, 1, 4672, f);
fclose(f);
// init the net
initialize(weights);
// test run
float input[32];
float outputs[4];
for (int i = 0; i < 32; i++) scanf("%f", &input[i]);
net(input, outputs);
printf("%f %f %f %f\n", outputs[0], outputs[1], outputs[2], outputs[3]);
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,2 @@
281 8.961814 tabby, tabby cat
did inference in 1105.86 ms

View File

@ -0,0 +1,5 @@
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
0. 0. 0. 0.]

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,61 @@
optimizing for GPU
*** 2.00 ms : kernel 0 r_64_8_7_7_2_16_4_3_7_4_4_7 [49, 8, 64] [4, 16, 2] takes 2.00 ms, 7772 GFLOPS
*** 2.31 ms : kernel 1 r_2048_7_7_2_8_8_3_3 [7, 7, 2048] [8, 8, 2] takes 0.31 ms, 370 GFLOPS
*** 2.49 ms : kernel 2 r_64_2_49_8_16_16_4_4_4 [49, 2, 64] [16, 8] takes 0.18 ms, 9548 GFLOPS
*** 3.46 ms : kernel 3 r_64_2_7_7_8_8_2_64_4_4_3_3 [49, 2, 64] [2, 8, 8] takes 0.96 ms, 15461 GFLOPS
*** 4.09 ms : kernel 4 r_64_8_49_8_16_16_4_4_4 [49, 8, 64] [16, 8] takes 0.64 ms, 10300 GFLOPS
*** 4.79 ms : kernel 5 r_64_8_49_8_16_16_4_4_4n1 [49, 8, 64] [16, 8] takes 0.69 ms, 10432 GFLOPS
*** 5.40 ms : kernel 6 r_64_2_49_8_16_64_4_4_4 [49, 2, 64] [16, 8] takes 0.61 ms, 10976 GFLOPS
*** 6.36 ms : kernel 7 r_64_2_7_7_8_8_2_64_4_4_3_3n1 [49, 2, 64] [2, 8, 8] takes 0.96 ms, 15461 GFLOPS
*** 7.04 ms : kernel 8 r_64_8_49_8_16_16_4_4_4n2 [49, 8, 64] [16, 8] takes 0.68 ms, 10321 GFLOPS
*** 7.65 ms : kernel 9 r_64_2_49_8_16_64_4_4_4n1 [49, 2, 64] [16, 8] takes 0.61 ms, 10976 GFLOPS
*** 8.62 ms : kernel 10 r_64_2_7_7_8_8_2_64_4_4_3_3n2 [49, 2, 64] [2, 8, 8] takes 0.96 ms, 15461 GFLOPS
*** 9.30 ms : kernel 11 r_64_8_49_8_16_16_4_4_4n3 [49, 8, 64] [16, 8] takes 0.68 ms, 10321 GFLOPS
*** 10.15 ms : kernel 12 r_64_4_49_8_16_64_4_4_4 [49, 4, 64] [16, 8] takes 0.85 ms, 15668 GFLOPS
*** 11.66 ms : kernel 13 r_32_2_7_7_2_16_4_128_4_4_3_3 [49, 2, 32] [4, 16, 2] takes 1.51 ms, 9851 GFLOPS
*** 13.29 ms : kernel 14 r_32_8_7_7_2_16_4_64_4_4_4 [49, 8, 32] [4, 16, 2] takes 1.63 ms, 8048 GFLOPS
*** 14.12 ms : kernel 15 r_32_8_49_2_16_4_32_4_4_4 [49, 8, 32] [4, 16, 2] takes 0.83 ms, 8304 GFLOPS
*** 15.00 ms : kernel 16 r_32_2_49_2_16_4_128_4_4_4 [49, 2, 32] [4, 16, 2] takes 0.88 ms, 7567 GFLOPS
*** 16.28 ms : kernel 17 r_32_2_7_7_2_16_4_128_4_4_3_3n1 [49, 2, 32] [4, 16, 2] takes 1.28 ms, 11633 GFLOPS
*** 17.11 ms : kernel 18 r_32_8_49_2_16_4_32_4_4_4n1 [49, 8, 32] [4, 16, 2] takes 0.83 ms, 8156 GFLOPS
*** 17.99 ms : kernel 19 r_32_2_49_2_16_4_128_4_4_4n1 [49, 2, 32] [4, 16, 2] takes 0.88 ms, 7567 GFLOPS
*** 19.26 ms : kernel 20 r_32_2_7_7_2_16_4_128_4_4_3_3n2 [49, 2, 32] [4, 16, 2] takes 1.28 ms, 11633 GFLOPS
*** 20.10 ms : kernel 21 r_32_8_49_2_16_4_32_4_4_4n2 [49, 8, 32] [4, 16, 2] takes 0.83 ms, 8156 GFLOPS
*** 20.97 ms : kernel 22 r_32_2_49_2_16_4_128_4_4_4n2 [49, 2, 32] [4, 16, 2] takes 0.88 ms, 7567 GFLOPS
*** 22.25 ms : kernel 23 r_32_2_7_7_2_16_4_128_4_4_3_3n3 [49, 2, 32] [4, 16, 2] takes 1.28 ms, 11633 GFLOPS
*** 23.09 ms : kernel 24 r_32_8_49_2_16_4_32_4_4_4n3 [49, 8, 32] [4, 16, 2] takes 0.83 ms, 8156 GFLOPS
*** 24.42 ms : kernel 25 r_32_4_49_2_16_4_128_4_4_4 [49, 4, 32] [4, 16, 2] takes 1.33 ms, 9942 GFLOPS
*** 26.54 ms : kernel 26 r_16_4_7_7_16_2_2_256_4_4_3_3 [49, 4, 16] [2, 2, 16] takes 2.12 ms, 6986 GFLOPS
*** 29.37 ms : kernel 27 r_16_16_7_7_16_2_2_128_4_4_4 [49, 16, 16] [2, 2, 16] takes 2.83 ms, 4642 GFLOPS
*** 30.70 ms : kernel 28 r_8_16_49_8_16_64_4_4_4 [49, 16, 8] [16, 8] takes 1.32 ms, 5090 GFLOPS
*** 32.19 ms : kernel 29 r_8_4_49_8_16_256_4_4_4 [49, 4, 8] [16, 8] takes 1.49 ms, 4426 GFLOPS
*** 33.73 ms : kernel 30 r_16_4_7_7_16_2_2_256_4_4_3_3n1 [49, 4, 16] [2, 2, 16] takes 1.54 ms, 9654 GFLOPS
*** 35.10 ms : kernel 31 r_8_16_49_8_16_64_4_4_4n1 [49, 16, 8] [16, 8] takes 1.38 ms, 4860 GFLOPS
*** 36.59 ms : kernel 32 r_8_4_49_8_16_256_4_4_4n1 [49, 4, 8] [16, 8] takes 1.49 ms, 4426 GFLOPS
*** 38.13 ms : kernel 33 r_16_4_7_7_16_2_2_256_4_4_3_3n2 [49, 4, 16] [2, 2, 16] takes 1.54 ms, 9654 GFLOPS
*** 39.51 ms : kernel 34 r_8_16_49_8_16_64_4_4_4n2 [49, 16, 8] [16, 8] takes 1.38 ms, 4860 GFLOPS
*** 41.00 ms : kernel 35 r_8_4_49_8_16_256_4_4_4n2 [49, 4, 8] [16, 8] takes 1.49 ms, 4426 GFLOPS
*** 42.53 ms : kernel 36 r_16_4_7_7_16_2_2_256_4_4_3_3n3 [49, 4, 16] [2, 2, 16] takes 1.54 ms, 9654 GFLOPS
*** 43.91 ms : kernel 37 r_8_16_49_8_16_64_4_4_4n3 [49, 16, 8] [16, 8] takes 1.38 ms, 4860 GFLOPS
*** 45.40 ms : kernel 38 r_8_4_49_8_16_256_4_4_4n3 [49, 4, 8] [16, 8] takes 1.49 ms, 4426 GFLOPS
*** 46.94 ms : kernel 39 r_16_4_7_7_16_2_2_256_4_4_3_3n4 [49, 4, 16] [2, 2, 16] takes 1.54 ms, 9654 GFLOPS
*** 48.32 ms : kernel 40 r_8_16_49_8_16_64_4_4_4n4 [49, 16, 8] [16, 8] takes 1.38 ms, 4860 GFLOPS
*** 49.81 ms : kernel 41 r_8_4_49_8_16_256_4_4_4n4 [49, 4, 8] [16, 8] takes 1.49 ms, 4426 GFLOPS
*** 51.34 ms : kernel 42 r_16_4_7_7_16_2_2_256_4_4_3_3n5 [49, 4, 16] [2, 2, 16] takes 1.54 ms, 9654 GFLOPS
*** 52.72 ms : kernel 43 r_8_16_49_8_16_64_4_4_4n5 [49, 16, 8] [16, 8] takes 1.38 ms, 4860 GFLOPS
*** 55.75 ms : kernel 44 r_8_8_49_8_16_256_4_4_4 [49, 8, 8] [16, 8] takes 3.03 ms, 4363 GFLOPS
*** 57.27 ms : kernel 45 r_8_8_8_16_512_3_3_7_7_4 [8, 8] [16, 8] takes 1.52 ms, 9721 GFLOPS
*** 61.02 ms : kernel 46 r_2_32_7_7_8_16_256_4_4_4 [49, 32, 2] [16, 8] takes 3.75 ms, 3506 GFLOPS
*** 62.81 ms : kernel 47 r_2_32_49_8_16_128_4_4_4 [49, 32, 2] [16, 8] takes 1.78 ms, 3732 GFLOPS
*** 64.99 ms : kernel 48 r_2_8_49_8_16_512_4_4_4 [49, 8, 2] [16, 8] takes 2.18 ms, 3019 GFLOPS
*** 66.95 ms : kernel 49 r_8_8_8_16_512_3_3_7_7_4n1 [8, 8] [16, 8] takes 1.96 ms, 7570 GFLOPS
*** 68.78 ms : kernel 50 r_2_32_49_8_16_128_4_4_4n1 [49, 32, 2] [16, 8] takes 1.83 ms, 3619 GFLOPS
*** 70.96 ms : kernel 51 r_2_8_49_8_16_512_4_4_4n1 [49, 8, 2] [16, 8] takes 2.18 ms, 3019 GFLOPS
*** 72.92 ms : kernel 52 r_8_8_8_16_512_3_3_7_7_4n2 [8, 8] [16, 8] takes 1.96 ms, 7570 GFLOPS
*** 74.75 ms : kernel 53 r_2_32_49_8_16_128_4_4_4n2 [49, 32, 2] [16, 8] takes 1.83 ms, 3619 GFLOPS
*** 74.97 ms : kernel 54 r_1024_32_49_4 [1024] [32] takes 0.22 ms, 30 GFLOPS
*** 75.15 ms : kernel 55 r_125_16_2_512_4_4_4 [125] [2, 16] takes 0.17 ms, 1503 GFLOPS
*** 75.16 ms : kernel 56 r_2_32_250_4 [2] [32] takes 0.01 ms, 5 GFLOPS
*** 75.20 ms : kernel 57 r_2_32_250_4n1 [2] [32] takes 0.04 ms, 7 GFLOPS
*** 75.20 ms : kernel 58 E_2_125_32_2_4 [125, 2] [2, 32] takes 0.00 ms, 42 GFLOPS
******* total 75.20 ms, 7037 GFLOPS

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,18 @@
using GPU backend
using LLaMA-7B model
Traceback (most recent call last):
File "/home/jebba/devel/tinygrad/tinygrad/examples/llama.py", line 386, in <module>
llama = LLaMa.build(MODEL_PATH, TOKENIZER_PATH, model_gen=args.gen, model_size=args.size, quantize=args.quantize, device=device)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jebba/devel/tinygrad/tinygrad/examples/llama.py", line 155, in build
sp_model = SentencePieceProcessor(model_file=str(tokenizer_path))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jebba/devel/tinygrad/tinygrad/venv/lib/python3.11/site-packages/sentencepiece/__init__.py", line 447, in Init
self.Load(model_file=model_file, model_proto=model_proto)
File "/home/jebba/devel/tinygrad/tinygrad/venv/lib/python3.11/site-packages/sentencepiece/__init__.py", line 905, in Load
return self.LoadFromFile(model_file)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jebba/devel/tinygrad/tinygrad/venv/lib/python3.11/site-packages/sentencepiece/__init__.py", line 310, in LoadFromFile
return _sentencepiece.SentencePieceProcessor_LoadFromFile(self, arg)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
OSError: Not found: "/home/jebba/devel/tinygrad/tinygrad/weights/LLaMA/tokenizer.model": No such file or directory Error #2

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,8 @@
Traceback (most recent call last):
File "/home/jebba/devel/tinygrad/tinygrad/examples/mixtral.py", line 33, in <module>
state = torch_load(args.weights + "/consolidated.00.pth.b")
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/jebba/devel/tinygrad/tinygrad/tinygrad/nn/state.py", line 77, in torch_load
t = Tensor.empty(os.stat(fn).st_size, dtype=dtypes.uint8, device=f"disk:{fn}")
^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: '/home/jebba/devel/tinygrad/tinygrad/weights/mixtral-8x7b-32kseqlen/consolidated.00.pth.b'

File diff suppressed because one or more lines are too long

View File

@ -8,7 +8,7 @@ msgid ""
msgstr ""
"Project-Id-Version: tinyrocs 0\n"
"Report-Msgid-Bugs-To: \n"
"POT-Creation-Date: 2024-02-06 11:06-0700\n"
"POT-Creation-Date: 2024-02-06 11:14-0700\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
"Language: en\n"
@ -128,3 +128,51 @@ msgstr ""
#: ../../../_source/output.rst:108
msgid "``python examples/coder.py``"
msgstr ""
#: ../../../_source/output.rst:114
msgid "``python examples/compile_efficientnet.py``"
msgstr ""
#: ../../../_source/output.rst:120
msgid "``python examples/compile_tensorflow.py.txt``"
msgstr ""
#: ../../../_source/output.rst:126
msgid "``python examples/conversation.py``"
msgstr ""
#: ../../../_source/output.rst:132
msgid "``python examples/efficientnet.py``"
msgstr ""
#: ../../../_source/output.rst:138
msgid "``python examples/f16_w_uint32.py``"
msgstr ""
#: ../../../_source/output.rst:144
msgid "``python examples/gpt2.py``"
msgstr ""
#: ../../../_source/output.rst:150
msgid "``python examples/handcode_resnet50_opt.py``"
msgstr ""
#: ../../../_source/output.rst:156
msgid "``python examples/hlb_cifar10.py``"
msgstr ""
#: ../../../_source/output.rst:162
msgid "``python examples/llama.py``"
msgstr ""
#: ../../../_source/output.rst:168
msgid "``python examples/mask_rcnn.py``"
msgstr ""
#: ../../../_source/output.rst:174
msgid "``python examples/mixtral.py``"
msgstr ""
#: ../../../_source/output.rst:180
msgid "``python examples/mnist_gan.py``"
msgstr ""

View File

@ -110,3 +110,75 @@ Note, while these examples were running, builds were also running, hitting
.. literalinclude:: _static/_output/tinygrad/coder.py.txt
:language: output
``python examples/compile_efficientnet.py``
----------------------------
.. literalinclude:: _static/_output/tinygrad/compile_efficientnet.py.txt
:language: output
``python examples/compile_tensorflow.py.txt``
----------------------------
.. literalinclude:: _static/_output/tinygrad/compile_tensorflow.py.txt
:language: output
``python examples/conversation.py``
----------------------------
.. literalinclude:: _static/_output/tinygrad/conversation.py.txt
:language: output
``python examples/efficientnet.py``
----------------------------
.. literalinclude:: _static/_output/tinygrad/efficientnet.py.txt
:language: output
``python examples/f16_w_uint32.py``
----------------------------
.. literalinclude:: _static/_output/tinygrad/f16_w_uint32.py.txt
:language: output
``python examples/gpt2.py``
----------------------------
.. literalinclude:: _static/_output/tinygrad/gpt2.py.txt
:language: output
``python examples/handcode_resnet50_opt.py``
----------------------------
.. literalinclude:: _static/_output/tinygrad/handcode_resnet50_opt.py.txt
:language: output
``python examples/hlb_cifar10.py``
----------------------------
.. literalinclude:: _static/_output/tinygrad/hlb_cifar10.py.txt
:language: output
``python examples/llama.py``
----------------------------
.. literalinclude:: _static/_output/tinygrad/llama.py.txt
:language: output
``python examples/mask_rcnn.py``
----------------------------
.. literalinclude:: _static/_output/tinygrad/mask_rcnn.py.txt
:language: output
``python examples/mixtral.py``
----------------------------
.. literalinclude:: _static/_output/tinygrad/mixtral.py.txt
:language: output
``python examples/mnist_gan.py``
----------------------------
.. literalinclude:: _static/_output/tinygrad/mnist_gan.py.txt
:language: output