This repository has been archived by the owner on Aug 5, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathapp.config
188 lines (159 loc) · 5.37 KB
/
app.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# no space between "="
# used by both .py and shell script
# search "?" to edit your server names
[env]
HADOOP_USER_NAME=hadoop
HADOOP_HOME=/home/hadoop/hadoop_latest
HADOOP_INSTALL=$HADOOP_HOME
HADOOP_MAPRED_HOME=$HADOOP_HOME
HADOOP_COMMON_HOME=$HADOOP_HOME
HADOOP_HDFS_HOME=$HADOOP_HOME
YARN_HOME=$HADOOP_HOME
HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
JAVA_HOME=$JAVA_HOME
PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin:$JAVA_HOME/bin
HDFS_RETR_DIR=/user/hadoop/upload/data_retrieved
APK_SANDBOX_IN_DIR=/home/django/nfs/prediction
APK_SANDBOX_OUT_DIR=/home/django/nfs/result
APK_SANDBOX_ARC_DIR=/home/django/nfs/archive
APK_STATIC_SW=/home/django/nfs/software/extract_feature.sh
KERAS_LIB_DIR=/usr/lib/keras-2.1.3
[spark]
#?
SPARK_MASTER=spark://master:7077
spark_executor_memory=1g
spark_cores_max=1
spark_driver_maxResultSize=0
spark_rdd_compress=True
#spark_kryoserializer_buffer_mb=256
[app]
#?
HADOOP_MASTER=hdfs://master:9000
#?
API_MODEL_URL=http://master:8000/atdml/api/model
# account used by Web to get model
APIWD='webapi7:setP@sSw0Rd_here'
#
SPARK_SUBMIT=/home/hadoop/spark_latest/bin/spark-submit
SPARK_URL=$SPARK_MASTER
WORKING_DIR=atdml/tasks
MEDIA_ROOT=/home/django/myml/media
UPLOAD_FULL_DIR=/home/django/myml/media/upload
FEATURE_SRC_DIR=/home/django/myml/media/upload
TRAIN_DES_DIR=/home/django/myml/media/result
LOG_DIR=/home/django/myml/media/log
TMP_DATA_DIR=/home/django/myml/media/tmpdata
#PREPROC_DEC_DIR=/home/django/myml/media/tmpdata
RETRIEVE_DEC_DIR=/home/django/myml/media/tmpdata
HDFS_UPLOAD_DIR=/user/hadoop/upload
FEATURE_DES_DIR=/user/hadoop/upload/data_out
RETRIEVE_DATA_DIR=/user/hadoop/upload/data_retrieved
HDFS_MODEL_DIR=/user/hadoop/upload/model
RETRIEVE_SCRIPT=db/query_mongo.py
RETRIEVE_PARQUET_CREATOR=ml/parquet_creator.py
EXTRACT_DS_SCRIPT=ml/extract_dataset.py
FEATURE_NG_PATTERN=ml/feature_extraction_ngram.py
FEATURE_CUSTOM=ml/feature_extraction_custom.py
FEATURE_ATD=atd/feature_atd.py
FEATURE_ANDROID_DYNAMIC=android/feature_extraction_dynamic.py
FEATURE_IN_STATIC=in/feature_in_static.py
FEATURE_IN_DYNAMIC=in/feature_in_dynamic.py
# depends on scala version below, Spark use scala 2.10
PCA_ML=ml/pca_ml.py
PCA_SKL=ml/pca_skLearn.py
TASK_EXE=/bin/bash
PCA_LOCAL_SCRIPT=/home/django/myml/atdml/tasks/pca_local.sh
TRAIN=ml/train.py
TRAIN_ML=ml/train_MLlib.py
TRAIN_ML_CV=ml/train_MLlib_with_cv.py
TRAIN_ML_CLUSTERING=ml/train_MLlib_clustering.py
TRAIN_SKL=ml/train_skLearn.py
TRAIN_SKL_CV=ml/train_skLearn_with_cv.py
TRAIN_SKL_CLUSTERING=ml/train_skLearn_clustering.py
MULTI_RUN=ml/multi_run.py
MULTI_RUN_ML=ml/train_MLlib_multi_run.py
MULTI_RUN_SKL=ml/train_skLearn_multi_run.py
#PREDICT_SINGLE_FILE_ATD=atd/predict_single_file_atd.py
PREDICT_SINGLE_FILE_IN_DYNAMIC=in/predict_single_file_in_dynamic.py
PREDICT_SINGLE_FILE_IN_STATIC=in/predict_single_file_in_static.py
PREDICT_SINGLE_FILE_ANDROID=android/predict_android_dynamic.py
PREDICT_SINGLE_FILE_GENERIC=ml/predict_single_file_generic.py
PREDICT_SINGLE_FILE_PATTERN=ml/predict_single_file_pattern.py
PREDICT_SINGLE_FILE_CUSTOM=ml/predict_single_file_custom.py
PREDICT_ENSEMBLE_PATTERN=ml/predict_ensemble.py
PREDICT_IMAGE=ml/predict_image.py
PREDICT_CUSTOM_SCRIPT=ml/predict_custom.py
FEATURE_IMPO_FIRM=ml/feature_importance_FIRM.py
FEATURE_IMPO_2WAYS=ml/feature_importance_2ways.py
FEATURE_IMPO_COMB=ml/feature_importance_combine.py
FEATURE_IMPO_GEN_FIRM=ml/feature_importance_f.py
FEATURE_IMPO_GEN_IP=ml/feature_importance_ip.py
FTYPE_IN_S=in-static
FTYPE_IN_S_MD5="MD5 List in-static"
FTYPE_IN_D=in-dynamic
FTYPE_IN_D_MD5="MD5 List in-dynamic"
FTYPE_ATD=ATD
FTYPE_ANDROID_D=Android-dynamic
FTYPE_NGRAM_GZ="N-gram pattern gz"
FTYPE_CUSTOM_GZ="Custom gz"
DNN_PREPROCESS=ml/preprocess_dnn.py
ML_QUEUE_DIR=/home/django/myml/media/ml_queue
# if DEFAULT_SQLITE file is empty; mysql info should be set
DEFAULT_SQLITE=/home/django/myml/db.sqlite3
# general program for SQL query;
EXEC_SQLITE=db/exec_sqlite.py
DEL_PREDICT_SQL="delete from atdml_document where file_type='predict' and train_id="
[mysql]
ip_address=
port=
db_name=
username=
password=
[mongo]
# for download dataset, optional
# ? optional data source
ip_address=mongo
port=?27017
db_name=?
tb_name=?
username=
password=
ITEM_PER_GZFILE=1000
# for output result
# ?
out_ip_address=mongodb
out_port=27017
out_db=myml
out_tb=dataset_info
out_username=
out_password=
[machine_learning]
training_portion=0.8
mtx_name_list=mtx_name_list
mtx_libsvm=mtx_libsvm
mtx_stat=mtx_stat
svm_num_gram=2
libsvm_alldata_filename=libsvm_data
dnn_alldata_filename=dnn_data
dnn_data_suffix=_dnn_data.npy.gz
dnn_label_suffix=_dnn_label.npy.gz
dnn_info_suffix=_dnn_info.npy.gz
random_seed=17
MAX_FEATURES=1442968193
feature_count_threshold=2
[ATD]
MAX_FEATURES_ATD=120011
param_in_gram_1_ATD=0
num_gram_ATD=2
[IN]
MAX_FEATURES_IN=120011
param_in_gram_1_in=2
feature_count_threshold=100
dic_file_in=01_dictionary.txt
omit_file_in=01_omit_num.txt
c_dynamic_proj='{"?FileInfo.md5":1,"FileInfo.DateAdded":1,"_id":1,"Classification.ctype":1}'
c_dynamic_filter='{"???":{"$exists":1}}'
c_dynamic_sql="?select FileInfo.md5,FileInfo.DateAdded,FileInfo.filetype,_label_ from srcTbl"
c_static_proj='{"?FileInfo.md5":1,"FileInfo.DateAdded":1,"FileInfo.filetype":1,"METADATA.data.features":1,"_id":1,"Classification.ctype":1}'
c_static_filter='{"?METADATA.data.features":{"$exists":1}}'
c_static_sql="?select FileInfo.md5,FileInfo.DateAdded,FileInfo.filetype,METADATA.data.features,_label_,METADATA.data.fingerprints from srcTbl"