diff --git a/atomgpt/ff.py b/atomgpt/ff.py
deleted file mode 100644
index afc8d95..0000000
--- a/atomgpt/ff.py
+++ /dev/null
@@ -1,304 +0,0 @@
-import torch
-import transformers
-from torch.utils.data import Dataset, DataLoader
-from transformers import (
-    GPT2Tokenizer,
-    GPT2Model,
-    AdamW,
-    get_linear_schedule_with_warmup,
-)
-import numpy as np
-import time
-import os
-
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
-
-class AtomGPTFF(torch.nn.Module):
-    def __init__(
-        self,
-        pretrained_model_name="gpt2",
-        latent_dim=512,
-        n_out=3,
-        tokenizer="",
-        include_stress=True,
-        stress_weight=0.1,
-        force_weight=1,
-    ):
-        super(AtomGPTFF, self).__init__()
-        self.config = transformers.GPT2Config.from_pretrained(
-            pretrained_model_name
-        )
-        self.gpt2 = transformers.GPT2Model.from_pretrained(
-            pretrained_model_name, config=self.config
-        )
-        self.tokenizer = tokenizer
-        self.regressor_forces = torch.nn.Sequential(
-            torch.nn.Linear(self.config.n_embd, latent_dim),
-            torch.nn.ReLU(),
-            torch.nn.Linear(latent_dim, n_out),
-        )
-        self.regressor_energies = torch.nn.Sequential(
-            torch.nn.Linear(self.config.n_embd, latent_dim),
-            torch.nn.ReLU(),
-            torch.nn.Linear(latent_dim, 1),
-        )
-        self.include_stress = include_stress
-        self.force_weight = force_weight
-        self.stress_weight = stress_weight
-
-    def forward(self, sample):
-        if isinstance(sample, dict):
-            sample = sample["text"]
-        texts = sample.split("@\n")[1].strip("&").split("\n")
-
-        forces = []
-        energies = []
-        coords = []
-        for text in texts:
-
-            # print('coords',text.split()[1:])
-            coord = np.array(text.strip("&").split()[1:], dtype=float)
-            coords.append(coord)
-            input_ids = self.tokenizer(text, return_tensors="pt").input_ids.to(
-                device
-            )
-            outputs = self.gpt2(input_ids=input_ids)
-            last_hidden_state = outputs.last_hidden_state
-            force = self.regressor_forces(last_hidden_state[:, -1, :])
-            forces.append(force)
-            energy = self.regressor_energies(last_hidden_state[:, -1, :])
-            energies.append(energy)
-
-        # Concatenate and pad forces to match the longest sequence in the batch
-        max_len = max(len(f) for f in forces)
-        forces = torch.cat(forces).to(device)
-
-        coords = torch.tensor(np.array(coords), dtype=torch.float).to(device)
-        net_energy = torch.sum(torch.cat(energies))
-        # print('forces',forces,forces.shape)
-        stress_tensor = torch.empty(1)
-        if self.include_stress and self.stress_weight > 0:
-            vol = float(sample.split("The volume is ")[1].split(".")[0])
-            stress_tensor = torch.flatten(
-                torch.sum(torch.einsum("ij,ik->ijk", coords, forces), dim=0)
-                / vol
-            ).squeeze(0)
-        info = {}
-        # info['forces'] = forces_padded
-        # print('stress_tensor',stress_tensor,stress_tensor.shape)
-        info["forces"] = forces
-        info["coords"] = coords
-        info["energy"] = net_energy
-        info["stress"] = stress_tensor
-        return info
-
-
-class AtomGPTFFDataset(Dataset):
-    def __init__(self, samples):
-        self.samples = samples
-
-    def __len__(self):
-        return len(self.samples)
-
-    def __getitem__(self, idx):
-        sample = self.samples[idx]
-        return sample
-
-
-def collate_fn(batch):
-    max_len = max(len(sample["forces"]) for sample in batch)
-    padded_batch = []
-    # print('batch',batch)
-    for sample in batch:
-        padded_sample = sample.copy()
-        padded_sample["forces"] += [[0, 0, 0]] * (
-            max_len - len(sample["forces"])
-        )  # Pad the targets
-        padded_batch.append(padded_sample)
-    # print('padded_batch',padded_batch)
-    return padded_batch
-
-
-def train(
-    tokenizer=None,
-    latent_dim=512,
-    train_array=[],
-    val_array=[],
-    test_array=[],
-    include_stress=True,
-    force_weight=1,
-    stress_weight=0.1,
-    batch_size=2,
-    num_epochs=10,
-    pretrained_model_name="gpt2",
-):
-    model = AtomGPTFF(
-        tokenizer=tokenizer,
-        include_stress=include_stress,
-        latent_dim=latent_dim,
-        pretrained_model_name=pretrained_model_name,
-        force_weight=force_weight,
-        stress_weight=stress_weight,
-    )
-
-    train_dataset = AtomGPTFFDataset(train_array)
-    print("Instance train", train_dataset[0])
-    train_dataloader = DataLoader(
-        train_dataset,
-        batch_size=batch_size,
-        shuffle=True,
-        collate_fn=collate_fn,
-    )
-    val_dataset = AtomGPTFFDataset(val_array)
-    print("Instance val", val_dataset[0])
-    val_dataloader = DataLoader(
-        val_dataset,
-        batch_size=batch_size,
-        shuffle=True,
-        collate_fn=collate_fn,
-    )
-
-    if test_array:
-        test_dataset = AtomGPTFFDataset(test_array)
-        test_dataloader = DataLoader(
-            test_dataset,
-            batch_size=batch_size,
-            shuffle=True,
-            collate_fn=collate_fn,
-        )
-
-    model.to(device)
-
-    optimizer = AdamW(model.parameters(), lr=5e-5)
-    total_steps = len(train_dataloader) * 10
-    scheduler = get_linear_schedule_with_warmup(
-        optimizer, num_warmup_steps=0, num_training_steps=total_steps
-    )
-    criterion = torch.nn.L1Loss()
-    best_loss = np.inf
-    for epoch in range(num_epochs):
-        t1 = time.time()
-        model.train()
-        train_loss = 0
-        for batch_idx, batch in enumerate(train_dataloader):
-            batch_loss = 0
-            optimizer.zero_grad()
-            for sample in batch:
-                pred = model(sample["text"])
-                target_forces = torch.tensor(sample["forces"])[
-                    0 : pred["forces"].shape[0]
-                ].to(device)
-                target_stress = torch.tensor(sample["stress"]).to(device)
-                energy_loss = criterion(
-                    pred["energy"], torch.tensor(sample["energy"]).to(device)
-                )
-                force_loss = force_weight * criterion(
-                    pred["forces"], target_forces
-                )
-                # stress_loss = stress_weight * criterion(
-                #    pred["stress"], target_stress.to(device)
-                # )
-                if include_stress:
-                    loss = energy_loss + force_loss + stress_loss
-                else:
-                    loss = energy_loss + force_loss
-                loss.backward()
-                batch_loss += loss.item()
-            train_loss += batch_loss
-            optimizer.step()
-            scheduler.step()
-        train_loss = train_loss / len(train_dataloader)
-        model.eval()
-        val_loss = 0
-        for batch_idx, batch in enumerate(val_dataloader):
-            batch_loss = 0
-            # optimizer.zero_grad()
-            for sample in batch:
-                pred = model(sample["text"])
-                target_forces = torch.tensor(sample["forces"])[
-                    0 : pred["forces"].shape[0]
-                ].to(device)
-                target_stress = torch.tensor(sample["stress"]).to(device)
-                energy_loss = (
-                    torch.mean(
-                        pred["energy"]
-                        - torch.tensor(sample["energy"]).to(device)
-                    )
-                ) ** 2
-                force_loss = (
-                    force_weight * torch.mean(pred["forces"] - target_forces)
-                ) ** 2
-                # stress_loss = (
-                #    stress_weight * torch.mean(pred["stress"] - target_stress)
-                # ) ** 2
-                include_stress = False
-                if include_stress:
-                    loss = energy_loss + force_loss + stress_loss
-                else:
-                    loss = energy_loss + force_loss
-                loss.backward()
-                batch_loss += loss.item()
-            val_loss += batch_loss
-            # optimizer.step()
-            # scheduler.step()
-        val_loss = val_loss / len(val_dataloader)
-        output_dir = "./"
-        if val_loss < best_loss:
-            best_loss = val_loss
-            best_model_name = "best_model.pt"
-            torch.save(
-                model.state_dict(),
-                os.path.join(output_dir, best_model_name),
-            )
-        t2 = time.time()
-        epoch_time = t2 - t1
-        print(
-            f"Epoch {epoch + 1}, Train Loss, Val Loss, Time:"
-            f" {train_loss:.4f}, {val_loss:.4f}, {epoch_time:.4f}"
-        )
-
-
-if __name__ == "__main__":
-
-    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
-    tokenizer.pad_token = tokenizer.eos_token
-    latent_dim = 512
-
-    samples = [
-        {
-            "text": "The volume is 60.@\nGa 0 0 0&",
-            "stress": [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            "forces": [[1.2, 1, 1]],
-            "energy": 1,
-        },
-        {
-            "text": "The volume is 60.@\nGa 1 1 1 \nAs 2 2 2&",
-            "stress": [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            "forces": [[1.2, 1, 1], [1.2, 1, 1]],
-            "energy": 1,
-        },
-        {
-            "text": "The volume is 60.@\nGa 1 1 1 \nAs 2 2 2\nAl 3 3 3&",
-            "stress": [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            "forces": [[1.2, 1, 1], [1.2, 1, 1], [1.2, 1, 1]],
-            "energy": 2,
-        },
-        {
-            "text": "The volume is 60.@\nGa 1 1 1 \nAs 2 2 2 \nAl 3 3 3 \nXe 4 4 4&",
-            "stress": [1, 1, 1, 1, 1, 1, 1, 1, 1],
-            "forces": [[1.2, 1, 1], [1.2, 1, 1], [1.2, 1, 1], [1.2, 1, 1]],
-            "energy": 3,
-        },
-    ]
-    train(
-        tokenizer=tokenizer,
-        latent_dim=512,
-        train_array=samples,
-        val_array=samples,
-        test_array=samples,
-        include_stress=True,
-        batch_size=2,
-        num_epochs=10,
-        pretrained_model_name="gpt2",
-    )