Commit ab8de91d authored by Your Name's avatar Your Name

Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

parents 4064f254 08ab56d4
......@@ -76,6 +76,17 @@ def con_sql(db,sql):
return df
def get_pre_number():
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select count(*) from esmm_pre_data"
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchone()[0]
print("预测集数量:")
print(result)
db.close()
def feature_engineer():
apps_number, app_list_map, level2_number, leve2_map, level3_number, leve3_map = get_map()
unique_values = []
......@@ -221,9 +232,11 @@ def feature_engineer():
print("train tfrecord done")
print((h - f) / 60)
print("样本总量:")
print("训练集样本总量:")
print(rdd.count())
get_pre_number()
test = rdd.filter(lambda x: x[0] == validate_date).map(
lambda x: (x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9],
x[10], x[11], x[12], x[13]))
......
......@@ -158,18 +158,19 @@ def get_hdfs(dir_in):
return a
def get_pre():
def get_pre_number():
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select count(*) from esmm_pre_data"
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchone()[0]
print("预测集数量:")
print(result)
db.close()
if __name__ == '__main__':
get_pre()
# get_pre()
# sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
# .set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
# .set("spark.tispark.plan.allow_index_double_read", "false") \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment