singa/src/proto/job.proto at 8b0b36284c99ed09c7fc4b1e49731581c03f3e44 · apache/singa · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
/************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*   http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied.  See the License for the
* specific language governing permissions and limitations
* under the License.
*
*************************************************************/

package singa;

/*
 * To start a training job, all we need is a JobProto object.
 * It should contain following fields
 *  - Job Name (name)
 *      the name to identify the job
 *  - NeuralNet (neuralnet)
 *      the neural network structure contains a set of layers
 *  - Train One Batch (alg)
 *      the training algorithm
 *  - Updater (updater)
 *      the protocol for updating parameters at server side
 *  - Cluster Topology (cluster)
 *      the distributed topology of workers/servers
 *  - Training Steps (train_steps)
 *      the number of training iteration
 *  All other fields/functions are optional, e.g., test, checkpoint
 */

message JobProto {
  // job name, e.g., "cifar10-dcnn", "mnist-mlp"
  optional string name = 1;
  // neural net consits of a set of connected layers
  optional NetProto neuralnet = 3;
  // algorithm for computing gradients over one mini-batch
  optional AlgProto train_one_batch = 5;
  // configuration of SGD updater, including learning rate, etc.
  optional UpdaterProto updater = 7;
  // cluster toplogy conf
  optional ClusterProto cluster = 9;
  // total num of steps for training
  optional int32 train_steps = 16;
  // frequency of displaying training info
  optional int32 disp_freq = 17 [default = 0];
  // GPU device IDs for use, if fewer than workers per procs, some workers run
  // on GPU and the rest run on CPU.
  repeated int32 gpu = 18;

  // frequency of test, e.g., do test every 100 training steps
  optional int32 test_freq = 20 [default = 0];
  // total num of steps for testing all test data;
  // TODO(wangwei): set -1 for test forever
  optional int32 test_steps =  21 [default = 0];
  // frequency of validation, e.g., do validation every 100 training steps
  optional int32 validate_freq = 25 [default = 0];
  // total num of steps for validating all validation data
  optional int32 validate_steps = 26 [default = 0];
  // frequency of checkpoint
  optional int32 checkpoint_freq = 30 [default = 0];

  // for loading checkpoint files to init parameters
  repeated string checkpoint_path = 60;
  // send parameters to servers after training for this num of steps
  optional int32 warmup_steps = 61 [default = 0];
  // display debug info
  optional bool debug = 62 [default = false];
  // reset the version of params loaded from checkpoint file to step
  optional bool reset_param_version = 63 [default = true];
  // set num of threads used by openblas
  optional int32 num_openblas_threads = 64 [default = 1];

  // start checkpoint after this num steps
  optional int32 checkpoint_after = 80 [default = 0];
  // start display after this num steps
  optional int32 disp_after =  81[default = 0];
  // start test after this num steps
  optional int32 test_after = 82 [default = 0];
  // start validation after this num steps
  optional int32 validate_after = 83 [default = 0];

  // for internal use
  // users typically do not touch following fields

  // resume flag
  optional bool resume = 90 [default = false];
  // last snapshot step
  optional int32 step = 91 [default = 0];
  // job id allocated by zookeeper
  optional int32 id = 92 [default = -1];

  extensions 101 to 200;
}

// Protos used by JobProto
// -----------------------

message AlgProto {
  // algorithms calculating gradients for one mini-batch/iteration
  optional AlgType alg = 1 [default = kUserAlg];
  // user defined algorithm
  optional string user_alg = 2;
  // for setting CD fields
  optional CDProto cd_conf = 10;

  extensions 101 to 200;
}
message NetProto {
  repeated LayerProto layer = 1;
  // partitioning type for parallelism
  optional int32 partition_dim = 20 [default = 0];
  // Each layer corresponds to a group of unrolled layers, used in RNN models
  repeated LayerGroupProto layer_group = 21;
  optional int32 unroll_len = 22 [default = 1];
}

message LayerGroupProto {
  // name of the layers belong to the same group
  repeated string layer = 1;
}

message UpdaterProto {
  // built-in updater type
  optional UpdaterType type = 1 [default = kUserUpdater];
  // user-defned updater type
  optional string user_type = 2;

  // configuration for RMSProp algorithm
  optional RMSPropProto rmsprop_conf = 3;
  // congiguration for AdaDelta algorithm
  optional AdaDeltaProto adadelta_conf = 4;
  // congiguration for Adam algorithm
  optional AdamProto adam_conf = 5;
  // congiguration for AdamMax algorithm
  optional AdamMaxProto adammax_conf = 6;

  // learning rate generator
  optional LRGenProto learning_rate = 11;
  optional float momentum = 31 [default = 0];
  optional float weight_decay = 32 [default = 0];

  // used to avoid divide by 0, i.e. x/(y+delta)
  optional float delta = 35 [default = 0.00000001];

  optional float clip_low = 36 [default = 0];
  optional float clip_high = 37 [default = 0];

  extensions 101 to 200;
}

message ClusterProto {
  optional int32 nworker_groups = 1 [default = 1];
  optional int32 nserver_groups = 2 [default = 1];
  optional int32 nworkers_per_group = 3 [default = 1];
  optional int32 nservers_per_group = 4 [default = 1];
  optional int32 nworkers_per_procs = 5 [default = 1];
  optional int32 nservers_per_procs = 6 [default = 1];
  // local workspace for checkpoint files and vis files
  //required string workspace = 10;
  optional string workspace = 10;

  // servers and workers in different processes?
  optional bool server_worker_separate = 20 [default = false];

  // sync frequency between server groups
  optional int32 sync_freq = 21 [default = 1];

  // port number used by ZeroMQ
  optional int32 start_port = 60 [default = 6723];
  // share memory space between worker groups in one procs
  optional bool share_memory = 62 [default = true];

  // poll time in milliseconds
  optional int32 poll_time = 81 [default = 100];
}

message CDProto {
  //number of steps for gibbs sampling
  optional int32 cd_k = 1 [default = 1];
}

message LayerProto {
  // the layer name used for identification
  required string name = 1;
  // source layer names
  repeated string srclayers = 3;
  // parameters, e.g., weight matrix or bias vector
  repeated ParamProto param = 12;
  // all layers are included in the net structure for training phase by default.
  // some layers like data layer for loading test data are not used by training
  // phase should be removed by setting the exclude field.
  repeated Phase exclude = 15;
  // exclude field is deprecated, please use include field instead!!!
  // some layers like data layer for loading test data are not used by training
  // in this case, only test phase should be included by setting the include field.
  repeated Phase include = 14;
  // type of built-in layer
  optional LayerType type = 20 [default = kUserLayer];
  // type of user layer
  optional string user_type = 21;
  // share data and grad blob with the single src layer, e.g., relu layer can
  // share blobs from conv layer. It is useful for saving memory space.
  optional bool share_src_blobs = 22 [default = false];
  // for unrolling layers in RNN model
  optional int32 unroll_len = 23 [default = 1];
  optional int32 unroll_index = 24 [default = 0];
  repeated UnrollConnType unroll_conn_type = 25;
  repeated int32 shift = 26;

  // overrides the partition dimension for neural net
  optional int32 partition_dim = 60 [default = -1];
  // names of parameters shared from other layers
  optional int32 partition_id = 90 [default = 0];
  // num of partitions for this layer
  optional int32 num_partitions = 91 [default = 1];

  // layer specific configuration
  // configuration for input layers, id range [100, 200)
  optional StoreProto store_conf = 100;
  optional DataProto lmdbdata_conf = 190;
  optional MnistProto mnist_conf = 192;
  optional RGBImageProto rgbimage_conf = 193;
  optional DataProto sharddata_conf = 194;
  optional CharRNNProto char_rnn_conf = 195;
  optional OnehotProto onehot_conf = 196;

  // configuration for neuron layers id range [200, 300)
  optional ActivationProto activation_conf = 200;
  optional ConvolutionProto convolution_conf = 201;
  optional DropoutProto dropout_conf = 203;
  optional DummyProto dummy_conf = 204;
  optional InnerProductProto innerproduct_conf = 205;
  optional LRNProto lrn_conf = 206;
  optional PoolingProto pooling_conf = 207;
  optional RBMProto rbm_conf = 209;
  optional ReLUProto relu_conf = 211;
  optional SoftmaxProto softmax_conf = 214;
  optional GRUProto gru_conf = 215;
  optional EmbeddingProto embedding_conf = 216;
  optional BMProto bm_conf = 217;

  // configuration for loss layers, id range [300, 400)
  optional SoftmaxLossProto softmaxloss_conf = 301;

  // configuration for output layers id range [400, 500)
  optional ArgSortProto argsort_conf = 401;

  // configuration for connection layers, id range [501, )
  optional ConcateProto concate_conf = 502;
  optional SliceProto slice_conf = 503;
  optional SplitProto split_conf = 504;
  optional RNNDummyProto rnn_dummy_conf = 505;

  extensions 1001 to 1100;
}

// weight matrix should be defined before bias vector
// TODO(wangwei): separate conf for diff init method
message ParamProto {
  // used for identifying the same params from diff models and display deug info
  optional string name =  1 [default = ""];
  // for built-in Param
  optional ParamType type = 3 [default = kParam];
  // for user-defined Param
  optional string user_type = 4;

  optional ParamGenProto init =5;
    // multiplied on the global learning rate.
  optional float lr_scale = 15 [default = 1];
  // multiplied on the global weight decay.
  optional float wd_scale = 16 [default = 1];

  // name of the owner param from which this param shares the values
  optional string share_from = 60;

  // used interally
  optional int32 id = 90;
  // used internally
  optional int32 owner = 91 [default = -1];
  // partition dimension, -1 for no partition
  optional int32 partition_dim = 92;
  // usually, the program will infer the param shape
  repeated int32 shape = 93;

  extensions 101 to 200;
}

// ---------------------------
// protos for different layers
// ---------------------------
// learning rate generator proto
message LRGenProto {
  // user-defined change method
  optional ChangeMethod type = 1 [default = kUserChange];
  optional string user_type = 2;

  optional float base_lr = 3 [default = 0.01];

  optional FixedStepProto fixedstep_conf = 40;
  optional StepProto step_conf = 41;
  optional LinearProto linear_conf = 42;
  optional ExponentialProto exponential_conf = 43;
  optional InverseProto inverse_conf = 44;
  optional InverseTProto inverset_conf = 45;

  extensions 101 to 200;
}

message ParamGenProto {
  optional InitMethod type = 1 [default = kUserInit];
  optional string user_type =2;
  // constant init
  optional float value = 3 [default = 1];
  // for gaussian sampling
  optional float mean = 4 [default = 0];
  optional float std = 5 [default = 1];
  // for uniform sampling
  optional float low = 8 [default = -1];
  optional float high = 9 [default = 1];

  extensions 101 to 200;
}

enum ActivationType {
  RELU = 1;
  SIGMOID = 2;
  TANH = 3;
  STANH = 4;
}

message ActivationProto {
  optional ActivationType type = 1 [default = RELU];
}

message OnehotProto {
  optional int32 vocab_size = 1 [default = 0];
}

message RGBImageProto {
  // scale factor for each pixel
  optional float scale = 1 [default = 1.0];
  // size after cropping
  optional int32 cropsize = 2 [default = 0];
  // mirror the image
  optional bool mirror = 3 [default = false];
  // meanfile path
  optional string meanfile = 4 [default = ""];
}

message SplitProto {
  optional int32 num_splits = 1 [default = 1];
}

message StoreProto {
  optional string backend = 1;
  optional string path = 2;
  optional string separator = 3 [default = ","];
  optional string mean_file = 4;
  optional string std_file = 5;
  optional float mean_value = 6;
  optional float std_value = 7;
  repeated int32 batchsize = 8;
  repeated int32 shape = 9;
  optional bool encoded = 10 [default = false];
  optional int32 random_skip = 11 [default = 0];
  optional bool has_label = 12 [default = true];
  optional bool prefetching = 13 [default = false];
}

message CharRNNProto {
  optional string path = 1;
  optional string vocab_path = 2;
  // num of chars to read per instance,  should = NetProto::unroll_len
  optional int32 unroll_len = 3 [default = 50];
  optional int32 batchsize = 4 [default = 1];
}

message EmbeddingProto {
  optional int32 vocab_size = 1 [default = 0];
  optional int32 feature_dim = 2 [default = 100];

}

message BMProto {
}

message SoftmaxLossProto {
  // computing accuracy against topk results
  optional int32 topk = 1 [default = 1];
  // loss scale factor
  optional float scale = 30 [default = 1];
}

message ArgSortProto {
  // keep labels with topk scores
  optional int32 topk = 1 [default = 1];
}

message ConcateProto {
  optional int32 concate_dim = 1 [default = 0];
  optional int32 num_concates = 2 [default = 1];
}

message ConvolutionProto {
  // The number of outputs for the layer
  optional int32 num_filters = 1;
  // the kernel height/width
  optional int32 kernel = 2 [default = 3];
  // The padding height/width
  optional int32 pad = 30 [default = 0];
  // the stride
  optional int32 stride = 31 [default = 1];

  optional int32 kernel_x = 41 [default = 3];
  optional int32 kernel_y = 42 [default = 3];

  optional int32 pad_x = 44 [default = 0];
  optional int32 pad_y = 45 [default = 0];

  optional int32 stride_x = 47 [default = 1];
  optional int32 stride_y = 48 [default = 1];

  // cudnn workspace size in MB
  optional int32 workspace_byte_limit = 50 [default = 512];
}

message DataProto {
  // path to the data file/folder, absolute or relative to the workspace
  required string path = 2;
  // batch size.
  required int32 batchsize = 4;
  // skip [0,random_skip] records
  optional int32 random_skip = 30 [default = 0];
}

message MnistProto {
  // normalization x/norm_a
  required float norm_a = 1 [default = 1];
  // normalization x-norm_b
  required float norm_b = 2 [default = 0];

  // elastic distortion
  optional int32 kernel = 30 [default = 0];
  optional float sigma = 31 [default = 0];
  optional float alpha = 32 [default = 0];
  // rotation or horizontal shearing
  optional float beta = 33 [default = 0];
  // scaling
  optional float gamma = 34 [default = 0];
  // scale to this size as input for deformation
  optional int32 resize = 35 [default = 0] ;
  optional int32 elastic_freq = 36 [default = 0];
}

message DummyProto {
  // shape of data and grad blobs
  optional bool input = 1 [default = false];
  optional bool output = 2 [default = false];
  repeated int32 shape = 3;
}

message RNNDummyProto {
  optional string dynamic_srclayer = 1;
  // if shape set, random generate the data blob
  repeated int32 shape = 2;
  // if integer is true, generate integer data
  optional bool integer = 3 [default = false];
  // range of the random generation
  optional float low = 4 [default = 0];
  optional float high = 5 [default = 0];
}

// Message that stores parameters used by DropoutLayer
message DropoutProto {
  // dropout ratio
  optional float dropout_ratio = 30 [default = 0.5];
}

message RBMProto {
  required int32 hdim = 1; // The number of outputs for the layer
  optional bool bias_term = 2 [default = true]; // whether to have bias terms
  optional bool gaussian = 3 [default = false]; // use gaussian sampling or not
}

// Message that stores parameters used by GRULayer
message GRUProto {
  // dimension of hidden state for the layer
  required int32 dim_hidden = 1;
  // use bias vector or not
  optional bool bias_term = 2 [default = true];
}


// Message that stores parameters used by InnerProductLayer
message InnerProductProto {
  // number of outputs for the layer
  required int32 num_output = 1;
  // use bias vector or not
  optional bool bias_term = 30 [default = true];
  // transpose or not
  optional bool transpose = 31 [default = false];
}

message LRNProto {
  // local response size
  required int32 local_size = 1 [default = 5];
  // scale factor
  optional float alpha = 31 [default = 1.0];
  // exponential number
  optional float beta = 32 [default = 0.75];
  // offset
  optional float knorm = 34 [default = 1.0];
}
enum PoolMethod {
  MAX = 0;
  AVG = 1;
}

message PoolingProto {
  // The kernel size (square)
  optional int32 kernel= 1 [default = 3];
 // The pooling method
  optional PoolMethod pool = 30 [default = MAX];
  // The padding size
  optional uint32 pad = 31 [default = 0];
  // The stride
  optional uint32 stride = 32 [default = 2];

  optional int32 kernel_x = 41 [default = 3];
  optional int32 kernel_y = 42 [default = 3];

  optional int32 pad_x = 44 [default = 0];
  optional int32 pad_y = 45 [default = 0];

  optional int32 stride_x = 47 [default = 2];
  optional int32 stride_y = 48 [default = 2];
}

message ReLUProto {
  // Ref. Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013).
  // Rectifier nonlinearities improve neural network acoustic models.
  // In ICML Workshop on Deep Learning for Audio, Speech, and Language Processing.
  optional float negative_slope = 1 [default = 0];
}

message SliceProto {
  optional int32 slice_dim = 1 [default = 0];
  optional int32 num_slices = 2 [default = 1];
}

message SoftmaxProto {
  // Can be used to do softmax over each channel of one image by setting it to
  // be the size of the second dimension (the first dimension is batchsize).
  optional int32 num_softmax_per_instance = 1 [default = 1];
}

message RMSPropProto {
 // history=history*rho_+(1-rho_)*(grad*grad_scale);
  required float rho = 1;
}
message AdaDeltaProto {
  required float rho = 1 [default = 0.9];
}
message AdamProto {
  required float beta1 = 1 [default = 0.9];
  required float beta2 = 2 [default = 0.999];
}
message AdamMaxProto {
  required float beta1 = 1 [default = 0.9];
  required float beta2 = 2 [default = 0.999];
}

message FixedStepProto {
  repeated int32 step = 28;
  // lr = step_lr[i] if current step >= step[i]
  repeated float step_lr = 29;
}

message StepProto {
  // lr = base_lr * gamma^(step/change_freq)
  required float gamma = 35 [default = 1];
  // lr = base_lr * gamma^(step/change_freq)
  required int32 change_freq = 40;
}

message LinearProto {
  // lr = (1 - step / freq) * base_lr + (step / freq) * final_lr
  required int32 change_freq= 40;
  // lr = (1 - step / freq) * base_lr + (step / freq) * final_lr
  required float final_lr = 39;
}

message ExponentialProto {
  // lr = base / 2^(step/change_freq)
  required int32 change_freq = 40;
}

message InverseTProto {
  // lr = base_lr / (1+step/final_lr)
  required float final_lr = 39;
}
message InverseProto {
  // lr = base_lr*(1+gamma*step)^(-pow)
  required float gamma = 1 [default = 1];
  // lr = base_lr*(1+gamma*step)^(-pow)
  required float pow = 2 [default = 0];
}
message UniformProto {
  optional float low = 1 [default = -1];
  optional float high = 2 [default = 1];
}
message GaussianProto {
  optional float mean = 1 [default = 0];
  optional float std = 2 [default = 1];
}

// --------------
// All Enum Types
// --------------

enum AlgType {
  // Back-propagation algorithm for feed-forward models, e.g., CNN and RNN
  kBP = 1;
  // Contrastive Divergence algorithm for RBM, DBM, etc.
  kCD = 2;
  // BPTT for training RNN models
  kBPTT = 3;
  // For user defined algorithm.
  kUserAlg = 104;
}

enum LayerType {
  /*
   * Input layers
   *  - Load records from file, database
   */
  kCSVInput = 100;
  kImagePreprocess = 101;
  kRecordInput = 103;
  kLMDBData = 190;  // deprecated
  kLabel = 191;  // deprecated
  kMnist = 192;  // deprecated
  kRGBImage = 193;  // deprecated
  kShardData = 194;  // deprecated
  kCharRNN = 195;
  kRNNLabel = 196;
  kOneHot = 197;

  /*
   * Neuron layers
   *  - Feature transformation
   */
  kConvolution = 201;
  kCConvolution = 202;
  kDropout = 203;
  kDummy = 204;
  kInnerProduct = 205;
  kLRN = 206;
  kPooling = 207;
  kCPooling = 208;
  kRBMHid = 209;
  kRBMVis = 210;
  kReLU = 211;
  kSTanh = 212;
  kSigmoid = 213;
  kSoftmax = 214;
  kGRU = 215;
  kEmbedding = 216;
  kActivation = 217;
  kBM = 218;

  kCudnnConv = 250;
  kCudnnPool = 251;
  kCudnnLRN = 252;
  kCudnnSoftmax = 253;
  kCudnnActivation = 254;
  kCudnnBM = 255;

  /*
   * Loss layers
   *  - Compute objective loss
   */
  kEuclideanLoss = 300;
  kSoftmaxLoss = 301;
  // cudnn v3
  kCudnnSoftmaxLoss = 350;

  /*
   * Output layers
   *  - Write results to file, database
   */
  kAccuracy = 400;
  kArgSort = 401;
  kCSVOutput = 402;
  kRecordOutput = 403;
  kCharRNNOutput = 404;

  /*
   * Connection layers
   *  - Connect layers when neural net is partitioned
   */
  kBridgeDst = 500;
  kBridgeSrc = 501;
  kConcate = 502;
  kSlice = 503;
  kSplit = 504;
  kRNNDummy = 505;

  /*
   * User defined layer
   *  - users should configure user_type
   */
  kUserLayer = 600;
}

enum UpdaterType {
  // noraml SGD with momentum and weight decay
  kSGD = 1;
  // adaptive subgradient, http://www.magicbroom.info/Papers/DuchiHaSi10.pdf
  kAdaGrad = 2;
  // http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
  kRMSProp = 3;
  // Nesterov first optimal gradient method
  kNesterov = 4;
  // AdaDelta
  kAdaDelta = 5;
  // Adam
  kAdam = 6;
  // AdamMax
  kAdamMax = 7;
  // For user defined updater
  kUserUpdater = 105;
}

enum Phase {
  kUnknown = 0;
  kTrain = 1;
  kVal = 2;
  kTest= 4;
  // postivie phase for contrastive divergence algorithm
  kPositive = 8;
  // negative phase for contrastive divergence algorithm
  kNegative = 16;
  kForward = 32;
  kBackward = 64;
  kLoss = 128;
  kDeploy = 256;

  // used for aggregate parameter gradients when Param is shared
  kAggGrad = 512;
}

enum ParamType {
  // built-in Param
  kParam = 0;
  // user-defined Param
  kUser = 103;
}

enum ChangeMethod {
  kFixed = 0;
  kInverseT = 1;
  kInverse = 2;
  kExponential = 3;
  kLinear = 4;
  kStep = 5;
  kFixedStep = 6;
  // For user defiend change method
  kUserChange = 100;
}

enum InitMethod {
  // fix the values of all parameters  a constant in the value field
  kConstant = 0;
  // sample gaussian with std and mean
  kGaussian = 1;
  // uniform sampling between low and high
  kUniform = 2;
  // from Toronto Convnet, let a=1/sqrt(fan_in), w*=a after generating from
  // Gaussian distribution
  kGaussianSqrtFanIn = 4;
  // from Toronto Convnet, rectified linear activation, let
  // a=sqrt(3)/sqrt(fan_in), range is [-a, +a]; no need to set value=sqrt(3),
  // the program will multiply it.
  kUniformSqrtFanIn = 5;
  // from Theano MLP tutorial, let a=sqrt(6/(fan_in+fan_out)). for tanh
  // activation, range is [-a, +a], for sigmoid activation, range is
  // [-4a, +4a], put the scale factor to value field.
  // <a href="http://deeplearning.net/tutorial/mlp.html"> Theano MLP</a>
  kUniformSqrtFanInOut = 6;

  // For user defined init method
  kUserInit = 101;
}

enum UnrollConnType {
  // i-th unrolled layer <- (i - shift)-th src unrolled layer
  kUnrollOneToOne = 1;
  // i-th unrolled layer <- all src unrolled layers
  kUnrollOneToAll = 2;
  // i-th unrolled layer <- last unrolled src layer
  kUnrollFirstToLast = 3;
  // customized connection type defined by src_conn
  kUnrollCustomized = 4;
}