Skip to content

Commit

Permalink
Update environment and torchnlp.py
Browse files Browse the repository at this point in the history
  • Loading branch information
shwars committed May 25, 2022
1 parent bfb93b0 commit de581e2
Show file tree
Hide file tree
Showing 7 changed files with 30 additions and 25 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ tensorflow-datasets==4.4.0
tensorflow-hub==0.12.0
tensorflow-text==2.8.1
tensorflow==2.8.1
tensorboard==2.8.1
tensorboard==2.8.0
tokenizers==0.10.3
torchinfo==0.0.8
tqdm==4.62.3
Expand Down
2 changes: 1 addition & 1 deletion binder/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ tensorflow-datasets==4.4.0
tensorflow-hub==0.12.0
tensorflow-text==2.8.1
tensorflow==2.8.1
tensorboard==2.8.1
tensorboard==2.8.0
tokenizers==0.10.3
torchinfo==0.0.8
tqdm==4.62.3
Expand Down
4 changes: 1 addition & 3 deletions etc/how-to-run.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@ After you install miniconda, you need to clone the repository and create a virtu
```bash
git clone http://github.com/microsoft/ai-for-beginners
cd ai-for-beginners
cd .devcontainer
conda env create --name ai4beg --file environment.yml
cd ..
conda env create --name ai4beg --file .devcontainer/environment.yml
conda activate ai4beg
```

Expand Down
9 changes: 8 additions & 1 deletion lessons/5-NLP/14-Embeddings/torchnlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,16 @@ def load_dataset(ngrams=1,min_freq=1):
vocab = torchtext.vocab.vocab(counter, min_freq=min_freq)
return train_dataset,test_dataset,classes,vocab

stoi_hash = {}
def encode(x,voc=None,unk=0,tokenizer=tokenizer):
global stoi_hash
v = vocab if voc is None else voc
return [v.get_stoi().get(s,unk) for s in tokenizer(x)]
if v in stoi_hash.keys():
stoi = stoi_hash[v]
else:
stoi = v.get_stoi()
stoi_hash[v]=stoi
return [stoi.get(s,unk) for s in tokenizer(x)]

def train_epoch(net,dataloader,lr=0.01,optimizer=None,loss_fn = torch.nn.CrossEntropyLoss(),epoch_size=None, report_freq=200):
optimizer = optimizer or torch.optim.Adam(net.parameters(),lr=lr)
Expand Down
20 changes: 3 additions & 17 deletions lessons/5-NLP/16-RNN/RNNPyTorch.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Loading dataset...\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\WORK\\ai-for-beginners\\5-NLP\\16-RNN\\data\\train.csv: 29.5MB [00:01, 28.3MB/s] \n",
"d:\\WORK\\ai-for-beginners\\5-NLP\\16-RNN\\data\\test.csv: 1.86MB [00:00, 9.72MB/s] \n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading dataset...\n",
"Building vocab...\n"
]
}
Expand Down Expand Up @@ -461,10 +447,10 @@
],
"metadata": {
"interpreter": {
"hash": "0cb620c6d4b9f7a635928804c26cf22403d89d98d79684e4529119355ee6d5a5"
"hash": "16af2a8bbb083ea23e5e41c7f5787656b2ce26968575d8763f2c4b17f9cd711f"
},
"kernelspec": {
"display_name": "py37_pytorch",
"display_name": "Python 3.8.12 ('py38')",
"language": "python",
"name": "python3"
},
Expand Down
9 changes: 8 additions & 1 deletion lessons/5-NLP/16-RNN/torchnlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,16 @@ def load_dataset(ngrams=1,min_freq=1):
vocab = torchtext.vocab.vocab(counter, min_freq=min_freq)
return train_dataset,test_dataset,classes,vocab

stoi_hash = {}
def encode(x,voc=None,unk=0,tokenizer=tokenizer):
global stoi_hash
v = vocab if voc is None else voc
return [v.get_stoi().get(s,unk) for s in tokenizer(x)]
if v in stoi_hash.keys():
stoi = stoi_hash[v]
else:
stoi = v.get_stoi()
stoi_hash[v]=stoi
return [stoi.get(s,unk) for s in tokenizer(x)]

def train_epoch(net,dataloader,lr=0.01,optimizer=None,loss_fn = torch.nn.CrossEntropyLoss(),epoch_size=None, report_freq=200):
optimizer = optimizer or torch.optim.Adam(net.parameters(),lr=lr)
Expand Down
9 changes: 8 additions & 1 deletion lessons/5-NLP/17-GenerativeNetworks/torchnlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,16 @@ def load_dataset(ngrams=1,min_freq=1):
vocab = torchtext.vocab.vocab(counter, min_freq=min_freq)
return train_dataset,test_dataset,classes,vocab

stoi_hash = {}
def encode(x,voc=None,unk=0,tokenizer=tokenizer):
global stoi_hash
v = vocab if voc is None else voc
return [v.get_stoi().get(s,unk) for s in tokenizer(x)]
if v in stoi_hash.keys():
stoi = stoi_hash[v]
else:
stoi = v.get_stoi()
stoi_hash[v]=stoi
return [stoi.get(s,unk) for s in tokenizer(x)]

def train_epoch(net,dataloader,lr=0.01,optimizer=None,loss_fn = torch.nn.CrossEntropyLoss(),epoch_size=None, report_freq=200):
optimizer = optimizer or torch.optim.Adam(net.parameters(),lr=lr)
Expand Down

0 comments on commit de581e2

Please sign in to comment.