Spaces:

CSquid333
/

RASP-Synthesis

Sleeping

App Files Files Community

NeelNanda commited on Jan 14, 2023

Commit

c46567d

1 Parent(s): 4d24b96

Made compatible with Python 3.8

Browse files

Files changed (16) hide show

tracr/compiler/assemble.py +12 -11
tracr/compiler/basis_inference.py +3 -2
tracr/compiler/compiling.py +2 -1
tracr/compiler/craft_graph_to_model.py +5 -5
tracr/compiler/lib.py +2 -2
tracr/compiler/nodes.py +2 -2
tracr/compiler/rasp_to_graph.py +3 -2
tracr/compiler/rasp_to_transformer_integration_test.py +1 -1
tracr/craft/bases.py +1 -1
tracr/craft/chamber/categorical_attn.py +2 -1
tracr/craft/chamber/numerical_mlp.py +2 -2
tracr/craft/transformers.py +3 -3
tracr/craft/vectorspace_fns.py +5 -5
tracr/rasp/rasp.py +10 -8
tracr/transformer/encoder.py +8 -8
tracr/transformer/model.py +4 -4

tracr/compiler/assemble.py CHANGED Viewed

@@ -15,7 +15,8 @@
 """Assemble weights of a transformer model from a craft residual stack."""
 import dataclasses
-from typing import Any, Callable, Optional, Protocol
 import chex
 import einops
@@ -32,11 +33,11 @@ from tracr.transformer import model
 @chex.dataclass
 class AssembledTransformerModelOutput:
-  decoded: list[Any]  # length T.
   unembedded: jax.Array  # [B, T]     B = 1 always.
-  layer_outputs: list[jax.Array]  # [B, T, D]
-  residuals: list[jax.Array]  # [B, T, D]
-  attn_logits: list[jax.Array]  # [B, T, T, H]
   transformer_output: jax.Array  # [B, T, D]
   input_embeddings: jax.Array
@@ -58,11 +59,11 @@ class AssembledTransformerModel:
   get_compiled_model: Callable[[], model.CompiledTransformerModel]
   params: hk.Params
   model_config: model.TransformerConfig
-  residual_labels: list[str]
   input_encoder: Optional[encoder.Encoder] = None
   output_encoder: Optional[encoder.Encoder] = None
-  def apply(self, tokens: list[bases.Value]) -> AssembledTransformerModelOutput:
     """Returns output from running the model on a set of input tokens."""
     if self.input_encoder:
       tokens = self.input_encoder.encode(tokens)
@@ -97,12 +98,12 @@ class EmbeddingModules:
 def _get_model_config_and_module_names(
     craft_model: transformers.SeriesWithResiduals
-) -> tuple[model.TransformerConfig, list[str]]:
   """Returns model config and locations (in params) for halflayers."""
-  multi_attn_heads: list[list[transformers.AttentionHead]] = []
-  mlps: list[transformers.MLP] = []
-  module_names: list[str] = []
   candidate_module_names = []
   for layer in range(len(craft_model.blocks)):

 """Assemble weights of a transformer model from a craft residual stack."""
 import dataclasses
+from typing import Any, Callable, Optional, List, Tuple
+from typing_extensions import Protocol
 import chex
 import einops
 @chex.dataclass
 class AssembledTransformerModelOutput:
+  decoded: List[Any]  # length T.
   unembedded: jax.Array  # [B, T]     B = 1 always.
+  layer_outputs: List[jax.Array]  # [B, T, D]
+  residuals: List[jax.Array]  # [B, T, D]
+  attn_logits: List[jax.Array]  # [B, T, T, H]
   transformer_output: jax.Array  # [B, T, D]
   input_embeddings: jax.Array
   get_compiled_model: Callable[[], model.CompiledTransformerModel]
   params: hk.Params
   model_config: model.TransformerConfig
+  residual_labels: List[str]
   input_encoder: Optional[encoder.Encoder] = None
   output_encoder: Optional[encoder.Encoder] = None
+  def apply(self, tokens: List[bases.Value]) -> AssembledTransformerModelOutput:
     """Returns output from running the model on a set of input tokens."""
     if self.input_encoder:
       tokens = self.input_encoder.encode(tokens)
 def _get_model_config_and_module_names(
     craft_model: transformers.SeriesWithResiduals
+) -> Tuple[model.TransformerConfig, List[str]]:
   """Returns model config and locations (in params) for halflayers."""
+  multi_attn_heads: List[List[transformers.AttentionHead]] = []
+  mlps: List[transformers.MLP] = []
+  module_names: List[str] = []
   candidate_module_names = []
   for layer in range(len(craft_model.blocks)):

tracr/compiler/basis_inference.py CHANGED Viewed

@@ -16,6 +16,7 @@
 import dataclasses
 import itertools
 import networkx as nx
 from tracr.compiler import nodes
@@ -34,12 +35,12 @@ class InferBasesOutput:
 def infer_bases(
     graph: nx.DiGraph,
     sink: Node,
-    vocab: set[rasp.Value],
     max_seq_len: int,
 ) -> None:
   """Infers in-place the possible output values and vector bases of the SOps."""
-  def compute_value_set(sop: rasp.SOp) -> set[rasp.Value]:
     """Computes value set using already-computed predecessor value sets."""
     if sop is rasp.tokens:
       return vocab

 import dataclasses
 import itertools
+from typing import Set
 import networkx as nx
 from tracr.compiler import nodes
 def infer_bases(
     graph: nx.DiGraph,
     sink: Node,
+    vocab: Set[rasp.Value],
     max_seq_len: int,
 ) -> None:
   """Infers in-place the possible output values and vector bases of the SOps."""
+  def compute_value_set(sop: rasp.SOp) -> Set[rasp.Value]:
     """Computes value set using already-computed predecessor value sets."""
     if sop is rasp.tokens:
       return vocab

tracr/compiler/compiling.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 # ==============================================================================
 """Combines all steps of compiling a RASP program."""
 from tracr.compiler import assemble
 from tracr.compiler import basis_inference
@@ -29,7 +30,7 @@ COMPILER_PAD = "compiler_pad"
 def compile_rasp_to_model(
     program: rasp.SOp,
-    vocab: set[rasp.Value],
     max_seq_len: int,
     causal: bool = False,
     compiler_bos: str = COMPILER_BOS,

 # limitations under the License.
 # ==============================================================================
 """Combines all steps of compiling a RASP program."""
+from typing import Set
 from tracr.compiler import assemble
 from tracr.compiler import basis_inference
 def compile_rasp_to_model(
     program: rasp.SOp,
+    vocab: Set[rasp.Value],
     max_seq_len: int,
     causal: bool = False,
     compiler_bos: str = COMPILER_BOS,

tracr/compiler/craft_graph_to_model.py CHANGED Viewed

@@ -15,7 +15,7 @@
 """Create a craft model from a computational graph."""
 import collections
-from typing import Sequence
 import networkx as nx
 from tracr.compiler import nodes
@@ -105,7 +105,7 @@ def _all_mlp_nodes(node_list: Sequence[Node]) -> bool:
 def _allocate_modules_to_layers(graph: nx.DiGraph,
-                                sources: Sequence[Node]) -> dict[int, int]:
   """Allocate all nodes in compute graph to layers.
   First, computes the longest path from the input to each node that is a model
@@ -128,9 +128,9 @@ def _allocate_modules_to_layers(graph: nx.DiGraph,
     A dict mapping from node ids to layer indices, where 0, 1, 2, 3, ...
     are in the order attention, mlp, attention, mlp, ...
   """
-  layer_allocation: dict[int, int] = collections.defaultdict(lambda: -1)
-  depth_by_node_id: dict[int, int] = dict()
-  nodes_by_depth: dict[int, list[Node]] = collections.defaultdict(list)
   # Compute depth of all model components (longest path from source to node)
   for node_id, node in graph.nodes.items():

 """Create a craft model from a computational graph."""
 import collections
+from typing import Sequence, List, Dict
 import networkx as nx
 from tracr.compiler import nodes
 def _allocate_modules_to_layers(graph: nx.DiGraph,
+                                sources: Sequence[Node]) -> Dict[int, int]:
   """Allocate all nodes in compute graph to layers.
   First, computes the longest path from the input to each node that is a model
     A dict mapping from node ids to layer indices, where 0, 1, 2, 3, ...
     are in the order attention, mlp, attention, mlp, ...
   """
+  layer_allocation: Dict[int, int] = collections.defaultdict(lambda: -1)
+  depth_by_node_id: Dict[int, int] = dict()
+  nodes_by_depth: Dict[int, List[Node]] = collections.defaultdict(list)
   # Compute depth of all model components (longest path from source to node)
   for node_id, node in graph.nodes.items():

tracr/compiler/lib.py CHANGED Viewed

@@ -14,7 +14,7 @@
 # ==============================================================================
 """RASP programs only using the subset of RASP supported by the compiler."""
-from typing import Sequence
 from tracr.rasp import rasp
@@ -95,7 +95,7 @@ def make_pair_balance(sop: rasp.SOp, open_token: str,
   return pair_balance.named("pair_balance")
-def make_shuffle_dyck(pairs: list[str]) -> rasp.SOp:
   """Returns 1 if a set of parentheses are balanced, 0 else.
    (As implemented in the RASP paper.)

 # ==============================================================================
 """RASP programs only using the subset of RASP supported by the compiler."""
+from typing import Sequence, List
 from tracr.rasp import rasp
   return pair_balance.named("pair_balance")
+def make_shuffle_dyck(pairs: List[str]) -> rasp.SOp:
   """Returns 1 if a set of parentheses are balanced, 0 else.
    (As implemented in the RASP paper.)

tracr/compiler/nodes.py CHANGED Viewed

@@ -14,9 +14,9 @@
 # ==============================================================================
 """Documents the data stored in nodes after each compiler pass."""
-from typing import Any
-Node = dict[str, Any]
 NodeID = str
 # RASP -> Graph

 # ==============================================================================
 """Documents the data stored in nodes after each compiler pass."""
+from typing import Any, Dict
+Node = Dict[str, Any]
 NodeID = str
 # RASP -> Graph

tracr/compiler/rasp_to_graph.py CHANGED Viewed

@@ -16,6 +16,7 @@
 import dataclasses
 import queue
 import networkx as nx
 from tracr.compiler import nodes
@@ -29,14 +30,14 @@ NodeID = nodes.NodeID
 class ExtractRaspGraphOutput:
   graph: nx.DiGraph
   sink: Node  # the program's output.
-  sources: list[Node]  # the primitive S-Ops.
 def extract_rasp_graph(tip: rasp.SOp) -> ExtractRaspGraphOutput:
   """Converts a RASP program into a graph representation."""
   expr_queue = queue.Queue()
   graph = nx.DiGraph()
-  sources: list[NodeID] = []
   def ensure_node(expr: rasp.RASPExpr) -> NodeID:
     """Finds or creates a graph node corresponding to expr; returns its ID."""

 import dataclasses
 import queue
+from typing import List
 import networkx as nx
 from tracr.compiler import nodes
 class ExtractRaspGraphOutput:
   graph: nx.DiGraph
   sink: Node  # the program's output.
+  sources: List[Node]  # the primitive S-Ops.
 def extract_rasp_graph(tip: rasp.SOp) -> ExtractRaspGraphOutput:
   """Converts a RASP program into a graph representation."""
   expr_queue = queue.Queue()
   graph = nx.DiGraph()
+  sources: List[NodeID] = []
   def ensure_node(expr: rasp.RASPExpr) -> NodeID:
     """Finds or creates a graph node corresponding to expr; returns its ID."""

tracr/compiler/rasp_to_transformer_integration_test.py CHANGED Viewed

@@ -38,7 +38,7 @@ class CompilerIntegrationTest(tests_common.VectorFnTestCase):
     for actual, expected in zip(actual_seq, expected_seq):
       if expected is not None and actual != expected:
         self.fail(f"{actual_seq} does not match (ignoring Nones) "
-                  f"{expected_seq=}")
   @parameterized.named_parameters(
       dict(

     for actual, expected in zip(actual_seq, expected_seq):
       if expected is not None and actual != expected:
         self.fail(f"{actual_seq} does not match (ignoring Nones) "
+                  f"expected_seq={expected_seq}")
   @parameterized.named_parameters(
       dict(

tracr/craft/bases.py CHANGED Viewed

@@ -243,5 +243,5 @@ def ensure_dims(
 ) -> None:
   """Raises ValueError if vs has the wrong number of dimensions."""
   if vs.num_dims != num_dims:
-    raise ValueError(f"{name} must have {num_dims=}, "
                      f"but got {vs.num_dims}: {vs.basis}")

 ) -> None:
   """Raises ValueError if vs has the wrong number of dimensions."""
   if vs.num_dims != num_dims:
+    raise ValueError(f"{name} must have num_dims={num_dims}, "
                      f"but got {vs.num_dims}: {vs.basis}")

tracr/craft/chamber/categorical_attn.py CHANGED Viewed

@@ -14,7 +14,8 @@
 # ==============================================================================
 """Attention head for categorical inputs."""
-from typing import Optional, Protocol
 from tracr.craft import bases
 from tracr.craft import transformers

 # ==============================================================================
 """Attention head for categorical inputs."""
+from typing import Optional
+from typing_extensions import Protocol
 from tracr.craft import bases
 from tracr.craft import transformers

tracr/craft/chamber/numerical_mlp.py CHANGED Viewed

@@ -16,7 +16,7 @@
 import dataclasses
-from typing import Callable, Iterable
 from tracr.craft import bases
 from tracr.craft import transformers
@@ -35,7 +35,7 @@ class DiscretisingLayerMaterials:
   """
   action: Callable[[bases.BasisDirection], bases.VectorInBasis]
   hidden_space: bases.VectorSpaceWithBasis
-  output_values: list[float]
 def _get_discretising_layer(input_value_set: Iterable[float],

 import dataclasses
+from typing import Callable, Iterable, List
 from tracr.craft import bases
 from tracr.craft import transformers
   """
   action: Callable[[bases.BasisDirection], bases.VectorInBasis]
   hidden_space: bases.VectorSpaceWithBasis
+  output_values: List[float]
 def _get_discretising_layer(input_value_set: Iterable[float],

tracr/craft/transformers.py CHANGED Viewed

@@ -16,7 +16,7 @@
 import abc
 import dataclasses
-from typing import Iterable, Optional, Sequence, Union
 import numpy as np
@@ -111,7 +111,7 @@ class AttentionHead(Block):
 @dataclasses.dataclass
 class MultiAttentionHead(Block):
   """Applies attention heads in parallel."""
-  sub_blocks: list[Union[AttentionHead, "MultiAttentionHead"]]
   def __post_init__(self):
     spaces = [block.residual_space for block in self.sub_blocks]
@@ -182,7 +182,7 @@ HalfLayerBlock = Union[MLP, AttentionHead, MultiAttentionHead]
 @dataclasses.dataclass
 class SeriesWithResiduals(Block):
   """A series of blocks with residual connections."""
-  blocks: list[HalfLayerBlock]
   def __post_init__(self):
     spaces = [block.residual_space for block in self.blocks]

 import abc
 import dataclasses
+from typing import Iterable, Optional, Sequence, Union, List
 import numpy as np
 @dataclasses.dataclass
 class MultiAttentionHead(Block):
   """Applies attention heads in parallel."""
+  sub_blocks: List[Union[AttentionHead, "MultiAttentionHead"]]
   def __post_init__(self):
     spaces = [block.residual_space for block in self.sub_blocks]
 @dataclasses.dataclass
 class SeriesWithResiduals(Block):
   """A series of blocks with residual connections."""
+  blocks: List[HalfLayerBlock]
   def __post_init__(self):
     spaces = [block.residual_space for block in self.blocks]

tracr/craft/vectorspace_fns.py CHANGED Viewed

@@ -65,7 +65,7 @@ class Linear(VectorFunction):
   def __call__(self, x: VectorInBasis) -> VectorInBasis:
     if x not in self.input_space:
-      raise TypeError(f"{x=} not in {self.input_space=}.")
     return VectorInBasis(
         basis_directions=sorted(self.output_space.basis),
         magnitudes=x.magnitudes @ self.matrix,
@@ -84,8 +84,8 @@ class Linear(VectorFunction):
     for i, direction in enumerate(input_space.basis):
       out_vector = action(direction)
       if out_vector not in output_space:
-        raise TypeError(f"image of {direction} from {input_space=} "
-                        f"is not in {output_space=}")
       matrix[i, :] = out_vector.magnitudes
     return Linear(input_space, output_space, matrix)
@@ -140,9 +140,9 @@ class ScalarBilinear:
   def __call__(self, x: VectorInBasis, y: VectorInBasis) -> float:
     """Describes the action of the operator on vectors."""
     if x not in self.left_space:
-      raise TypeError(f"{x=} not in {self.left_space=}.")
     if y not in self.right_space:
-      raise TypeError(f"{y=} not in {self.right_space=}.")
     return (x.magnitudes.T @ self.matrix @ y.magnitudes).item()
   @classmethod

   def __call__(self, x: VectorInBasis) -> VectorInBasis:
     if x not in self.input_space:
+      raise TypeError(f"x={x} not in self.input_space={self.input_space}.")
     return VectorInBasis(
         basis_directions=sorted(self.output_space.basis),
         magnitudes=x.magnitudes @ self.matrix,
     for i, direction in enumerate(input_space.basis):
       out_vector = action(direction)
       if out_vector not in output_space:
+        raise TypeError(f"image of {direction} from input_space={input_space} "
+                        f"is not in output_space={output_space}")
       matrix[i, :] = out_vector.magnitudes
     return Linear(input_space, output_space, matrix)
   def __call__(self, x: VectorInBasis, y: VectorInBasis) -> float:
     """Describes the action of the operator on vectors."""
     if x not in self.left_space:
+      raise TypeError(f"x={x} not in self.left_space={self.left_space}.")
     if y not in self.right_space:
+      raise TypeError(f"y={y} not in self.right_space={self.right_space}.")
     return (x.magnitudes.T @ self.matrix @ y.magnitudes).item()
   @classmethod

tracr/rasp/rasp.py CHANGED Viewed

@@ -16,7 +16,7 @@
 Every object in the RASP language is a function.
-The most important type is S-Op, which is a function list[Value] -> list[Value].
 An S-Op represents a state inside the residual stream of the transformer.
 Therefore, any RASP program that represents a transformer computation must
@@ -26,11 +26,12 @@ end of the computation. In particular, given an S-Op `x`,
 at location `x` when the transformer is fed [1, 2, 3] as input.
 A secondary (but still important) type is Selector, which is a function
-list[Value] -> list[list[bool]]. Given a Selector `sel`, sel([1, 2, 3])
 represents something like an attention matrix in the transformer.
 For a full reference on RASP, see https://arxiv.org/abs/2106.06981.
 """
 import abc
 import collections.abc
@@ -38,13 +39,14 @@ import copy
 import enum
 import functools
 import itertools
-from typing import (Any, Callable, Generic, Mapping, Optional, Protocol,
                     Sequence, TypeVar, Union)
 from absl import logging
 import numpy as np
-SelectorValue = list[list[bool]]
 NumericValue = Union[int, float]
 Value = Union[None, int, float, str, bool]
 VT = TypeVar("VT", bound=Value)
@@ -63,7 +65,7 @@ _ENCODING_KEY = "encoding"
 # that key is accessed.
 #
 # See the `default_name` annotator for a full example.
-DEFAULT_ANNOTATORS: dict[str, "Annotator"] = {}
 class Annotator(Protocol):
@@ -81,7 +83,7 @@ class _Annotations(collections.abc.Mapping):
   def __init__(self, expr, **kwargs: Any):
     self._expr = expr
-    self._inner_dict: dict[str, Any] = {**kwargs}
   def __getitem__(self, key: str) -> Any:
     if key not in self._inner_dict:
@@ -758,7 +760,7 @@ _default_name_by_class = {
 }
-def default_name(expr: RASPExpr) -> dict[str, str]:
   for cls, name in _default_name_by_class.items():
     if isinstance(expr, cls):
       return name
@@ -905,7 +907,7 @@ class DefaultRASPEvaluator(abc.ABC):
 def _get_selected(
-    selector_row: list[bool],
     values: Sequence[VT],
 ) -> Sequence[VT]:
   """Helper for aggregate. [T T F], [a b c] -> [a b]."""

 Every object in the RASP language is a function.
+The most important type is S-Op, which is a function List[Value] -> List[Value].
 An S-Op represents a state inside the residual stream of the transformer.
 Therefore, any RASP program that represents a transformer computation must
 at location `x` when the transformer is fed [1, 2, 3] as input.
 A secondary (but still important) type is Selector, which is a function
+List[Value] -> List[List[bool]]. Given a Selector `sel`, sel([1, 2, 3])
 represents something like an attention matrix in the transformer.
 For a full reference on RASP, see https://arxiv.org/abs/2106.06981.
 """
+import pdb
 import abc
 import collections.abc
 import enum
 import functools
 import itertools
+from typing import (Any, Callable, Generic, Mapping, Optional, List, Dict,
                     Sequence, TypeVar, Union)
+from typing_extensions import Protocol
 from absl import logging
 import numpy as np
+SelectorValue = List[List[bool]]
 NumericValue = Union[int, float]
 Value = Union[None, int, float, str, bool]
 VT = TypeVar("VT", bound=Value)
 # that key is accessed.
 #
 # See the `default_name` annotator for a full example.
+DEFAULT_ANNOTATORS: Dict[str, "Annotator"] = {}
 class Annotator(Protocol):
   def __init__(self, expr, **kwargs: Any):
     self._expr = expr
+    self._inner_dict: Dict[str, Any] = {**kwargs}
   def __getitem__(self, key: str) -> Any:
     if key not in self._inner_dict:
 }
+def default_name(expr: RASPExpr) -> Dict[str, str]:
   for cls, name in _default_name_by_class.items():
     if isinstance(expr, cls):
       return name
 def _get_selected(
+    selector_row: List[bool],
     values: Sequence[VT],
 ) -> Sequence[VT]:
   """Helper for aggregate. [T T F], [a b c] -> [a b]."""

tracr/transformer/encoder.py CHANGED Viewed

@@ -15,7 +15,7 @@
 """Basic encoder for inputs with a fixed vocabulary."""
 import abc
-from typing import Any, Sequence, Optional
 from tracr.craft import bases
@@ -28,11 +28,11 @@ class Encoder(abc.ABC):
   """
   @abc.abstractmethod
-  def encode(self, inputs: list[Any]) -> list[Any]:
     return list()
   @abc.abstractmethod
-  def decode(self, encodings: list[Any]) -> list[Any]:
     return list()
   @property
@@ -55,10 +55,10 @@ class Encoder(abc.ABC):
 class NumericalEncoder(Encoder):
   """Encodes numerical variables (simply using the identity mapping)."""
-  def encode(self, inputs: list[float]) -> list[float]:
     return inputs
-  def decode(self, encodings: list[float]) -> list[float]:
     return encodings
@@ -93,7 +93,7 @@ class CategoricalEncoder(Encoder):
     self._pad_token = pad_token
     self._max_seq_len = max_seq_len
-  def encode(self, inputs: list[bases.Value]) -> list[int]:
     if self.enforce_bos and inputs[0] != self.bos_token:
       raise ValueError("First input token must be BOS token. "
                        f"Should be '{self.bos_token}', but was '{inputs[0]}'.")
@@ -101,12 +101,12 @@ class CategoricalEncoder(Encoder):
       raise ValueError(f"Inputs {missing} not found in encoding ",
                        self.encoding_map.keys())
     if self._max_seq_len is not None and len(inputs) > self._max_seq_len:
-      raise ValueError(f"{inputs=} are longer than the maximum "
                        f"sequence length {self._max_seq_len}")
     return [self.encoding_map[x] for x in inputs]
-  def decode(self, encodings: list[int]) -> list[bases.Value]:
     """Recover the tokens that corresponds to `ids`. Inverse of __call__."""
     decoding_map = {val: key for key, val in self.encoding_map.items()}
     if missing := set(encodings) - set(decoding_map.keys()):

 """Basic encoder for inputs with a fixed vocabulary."""
 import abc
+from typing import Any, Sequence, Optional, List
 from tracr.craft import bases
   """
   @abc.abstractmethod
+  def encode(self, inputs: List[Any]) -> List[Any]:
     return list()
   @abc.abstractmethod
+  def decode(self, encodings: List[Any]) -> List[Any]:
     return list()
   @property
 class NumericalEncoder(Encoder):
   """Encodes numerical variables (simply using the identity mapping)."""
+  def encode(self, inputs: List[float]) -> List[float]:
     return inputs
+  def decode(self, encodings: List[float]) -> List[float]:
     return encodings
     self._pad_token = pad_token
     self._max_seq_len = max_seq_len
+  def encode(self, inputs: List[bases.Value]) -> List[int]:
     if self.enforce_bos and inputs[0] != self.bos_token:
       raise ValueError("First input token must be BOS token. "
                        f"Should be '{self.bos_token}', but was '{inputs[0]}'.")
       raise ValueError(f"Inputs {missing} not found in encoding ",
                        self.encoding_map.keys())
     if self._max_seq_len is not None and len(inputs) > self._max_seq_len:
+      raise ValueError(f"inputs={inputs} are longer than the maximum "
                        f"sequence length {self._max_seq_len}")
     return [self.encoding_map[x] for x in inputs]
+  def decode(self, encodings: List[int]) -> List[bases.Value]:
     """Recover the tokens that corresponds to `ids`. Inverse of __call__."""
     decoding_map = {val: key for key, val in self.encoding_map.items()}
     if missing := set(encodings) - set(decoding_map.keys()):

tracr/transformer/model.py CHANGED Viewed

@@ -26,7 +26,7 @@ Forked from: haiku.examples.transformer.model
 import collections
 import dataclasses
-from typing import Callable, Optional
 import chex
 import haiku as hk
@@ -44,9 +44,9 @@ CallableHaikuModule = Callable[..., jax.Array]
 @chex.dataclass
 class TransformerOutput:
-  layer_outputs: list[jax.Array]  # [B, T, D]
-  residuals: list[jax.Array]  # [B, T, D]
-  attn_logits: list[jax.Array]  # [B, H, T, T]
   output: jax.Array  # [B, T, D]
   input_embeddings: jax.Array  # [B, T, D]

 import collections
 import dataclasses
+from typing import Callable, Optional, List
 import chex
 import haiku as hk
 @chex.dataclass
 class TransformerOutput:
+  layer_outputs: List[jax.Array]  # [B, T, D]
+  residuals: List[jax.Array]  # [B, T, D]
+  attn_logits: List[jax.Array]  # [B, H, T, T]
   output: jax.Array  # [B, T, D]
   input_embeddings: jax.Array  # [B, T, D]