Change training strategy to adaptive learning rate

2019-12-16 02:30:05 -05:00 · 2019-12-16 02:30:05 -05:00 · 8fa6b46ca8
parent 3f2c6d18a3
commit 8fa6b46ca8
2 changed files with 39 additions and 41 deletions
--- a/plot.py
+++ b/plot.py
@ -6,7 +6,7 @@ import matplotlib
 from mpl_toolkits.axisartist.axislines import SubplotZero

 # Number learned symbols
-order = 4
+order = 16

 # File in which the trained model is saved
 input_file = 'output/constellation-order-{}.pth'.format(order)
@ -17,8 +17,8 @@ color_map = matplotlib.cm.Dark2
 # Restore model from file
 model = constellation.ConstellationNet(
    order=order,
-    encoder_layers_sizes=(8,),
-    decoder_layers_sizes=(8,),
+    encoder_layers_sizes=(8, 4),
+    decoder_layers_sizes=(4, 8),
    channel_model=constellation.GaussianChannel()
 )

@ -34,7 +34,7 @@ constellation = model.get_constellation()
 util.plot_constellation(
    ax, constellation,
    model.channel, model.decoder,
-    grid_step=0.001, noise_samples=2500
+    grid_step=0.001, noise_samples=0
 )

 pyplot.show()
--- a/train.py
+++ b/train.py
@ -7,16 +7,13 @@ from mpl_toolkits.axisartist.axislines import SubplotZero
 torch.manual_seed(42)

 # Number of symbols to learn
-order = 4
+order = 16

 # Number of batches to skip between every loss report
-loss_report_batch_skip = 500
+loss_report_batch_skip = 50

-# Size of batches during coarse optimization (small batches)
-coarse_batch_size = 8
-
-# Size of batches during fine optimization (large batches)
-fine_batch_size = 2048
+# Size of batches
+batch_size = 32

 # File in which the trained model is saved
 output_file = 'output/constellation-order-{}.pth'.format(order)
@ -33,15 +30,15 @@ pyplot.show(block=False)
 # Train the model with random data
 model = constellation.ConstellationNet(
    order=order,
-    encoder_layers_sizes=(8,),
-    decoder_layers_sizes=(8,),
+    encoder_layers_sizes=(8, 4),
+    decoder_layers_sizes=(4, 8),
    channel_model=constellation.GaussianChannel()
 )

 print('Starting training\n')

-criterion = torch.nn.CrossEntropyLoss()
-optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
+# Current batch index
+batch = 0

 # Accumulated loss for last batches
 running_loss = 0
@ -50,12 +47,6 @@ running_loss = 0
 # the second phase where point positions are refined using large batches
 is_coarse_optim = True

-# Current batch index
-batch = 1
-
-# Current batch size
-batch_size = coarse_batch_size
-
 # List of training examples (not shuffled)
 classes_ordered = torch.arange(order).repeat(batch_size)

@ -63,7 +54,18 @@ classes_ordered = torch.arange(order).repeat(batch_size)
 prev_constellation = model.get_constellation()
 total_change = float('inf')

-while True:
+# Optimizer settings
+criterion = torch.nn.CrossEntropyLoss()
+optimizer = torch.optim.Adam(model.parameters(), lr=0.1)
+scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
+    optimizer, verbose=True,
+    factor=0.25,
+    patience=100,
+    cooldown=50,
+    threshold=1e-8
+)
+
+while total_change >= 1e-4:
    # Shuffle training data and convert to one-hot encoding
    classes_dataset = classes_ordered[torch.randperm(len(classes_ordered))]
    onehot_dataset = util.messages_to_onehot(classes_dataset, order)
@ -76,38 +78,34 @@ while True:
    loss.backward()
    optimizer.step()

+    # Update learning rate scheduler
+    scheduler.step(loss)
+
    # Check for convergence
    model.eval()
    constellation = model.get_constellation()
    total_change = (constellation - prev_constellation).abs().sum()
    prev_constellation = constellation

-    if is_coarse_optim:
-        if total_change < 1e-5:
-            print('Changing to fine optimization')
-            is_coarse_optim = False
-            batch_size = fine_batch_size
-            classes_ordered = torch.arange(order).repeat(batch_size)
-    elif total_change < 1e-5:
-        break
-
-    # Report loss and update figure (if applicable)
+    # Report loss
    running_loss += loss.item()

    if batch % loss_report_batch_skip == loss_report_batch_skip - 1:
-        print('Batch #{} (size {})'.format(batch + 1, batch_size))
+        print('Batch #{}'.format(batch + 1))
        print('\tLoss is {}'.format(running_loss / loss_report_batch_skip))
        print('\tChange is {}\n'.format(total_change))

+        running_loss = 0
+
+    # Update figure with current encoding
    ax.clear()
    util.plot_constellation(
        ax, constellation,
-            model.channel, model.decoder
+        model.channel, model.decoder,
+        noise_samples=0
    )
    fig.canvas.draw()
-        pyplot.pause(1e-17)
-
-        running_loss = 0
+    fig.canvas.flush_events()

    batch += 1