stable diffusion in readme

2022-09-05 18:51:56 -07:00 · 2022-09-05 18:51:56 -07:00 · 0ba6179de7
parent c1d5af8b0c
commit 0ba6179de7
3 changed files with 19 additions and 11 deletions
--- a/README.md
+++ b/README.md
@ -160,6 +160,16 @@ PROTIP: Set "GPU=1" environment variable if you want this to go faster.

 PROPROTIP: Set "DEBUG=1" environment variable if you want to see why it's slow.

+### tinygrad supports Stable Diffusion!
+
+Run `LAZY=0 OPT=2 TORCH=1 python3 examples/stable_diffusion.py`
+
+<p align="center">
+  <img src="https://raw.githubusercontent.com/geohot/tinygrad/master/docs/stable_diffusion_by_tinygrad.jpg">
+</p>
+
+"a horse sized cat eating a bagel"
+
 ### tinygrad supports GANs

 See `examples/mnist_gan.py`
--- a/docs/stable_diffusion_by_tinygrad.jpg
+++ b/docs/stable_diffusion_by_tinygrad.jpg
--- a/examples/stable_diffusion.py
+++ b/examples/stable_diffusion.py
@ -361,14 +361,14 @@ class UNetModel:

    saved_inputs = []
    for i,b in enumerate(self.input_blocks):
-      print("input block", i)
+      #print("input block", i)
      for bb in b:
        x = run(x, bb)
      saved_inputs.append(x)
    for bb in self.middle_block:
      x = run(x, bb)
    for i,b in enumerate(self.output_blocks):
-      print("output block", i)
+      #print("output block", i)
      x = x.cat(saved_inputs.pop(), dim=1)
      for bb in b:
        x = run(x, bb)
@ -492,9 +492,7 @@ class StableDiffusion:
    self.first_stage_model = AutoencoderKL()
    self.cond_stage_model = namedtuple("CondStageModel", ["transformer"])(transformer = namedtuple("Transformer", ["text_model"])(text_model = CLIPTextTransformer()))

-  #def __call__(self, x, timesteps, context):
-    #return self.model.diffusion_model(x, timesteps, context)
-    #return self.first_stage_model(x)
+  # TODO: make __call__ run the model

 # ** ldm.models.autoencoder.AutoencoderKL (done!)
 # 3x512x512 <--> 4x64x64 (16384)
@ -530,7 +528,7 @@ if __name__ == "__main__":
    except (AttributeError, KeyError, IndexError):
      #traceback.print_exc()
      w = None 
-    print(f"{str(v.shape):30s}", w, k)
+    print(f"{str(v.shape):30s}", w.shape if w is not None else w, k)
    if w is not None:
      assert w.shape == v.shape
      w.assign(v.astype(np.float32))
@ -560,9 +558,9 @@ if __name__ == "__main__":
    e_t = unconditional_latent + unconditional_guidance_scale * (latent - unconditional_latent)
    return e_t

-  TIMESTEPS = 4
+  TIMESTEPS = 50
  timesteps = list(np.arange(1, 1000, 1000//TIMESTEPS))
-  print(timesteps)
+  print(f"running for {timesteps} timesteps")
  alphas = [model.alphas_cumprod.numpy()[t] for t in timesteps]
  alphas_prev = [1.0] + alphas[:-1]

@ -571,7 +569,7 @@ if __name__ == "__main__":
    a_t, a_prev = alphas[index], alphas_prev[index]
    sigma_t = 0
    sqrt_one_minus_at = math.sqrt(1-a_t)
-    print(a_t, a_prev, sigma_t, sqrt_one_minus_at)
+    #print(a_t, a_prev, sigma_t, sqrt_one_minus_at)

    pred_x0 = (x - sqrt_one_minus_at * e_t) / math.sqrt(a_t)

@ -586,8 +584,8 @@ if __name__ == "__main__":
  latent = Tensor.randn(1,4,64,64)

  # this is diffusion
-  for index, timestep in tqdm(list(enumerate(timesteps))[::-1]):
-    print(index, timestep)
+  for index, timestep in (t:=tqdm(list(enumerate(timesteps))[::-1])):
+    t.set_description("%3d %3d" % (index, timestep))
    e_t = get_model_output(latent, timestep)
    x_prev, pred_x0 = get_x_prev_and_pred_x0(latent, e_t, index)
    #e_t_next = get_model_output(x_prev)