Commit d94b9aa5 authored by 张彦钊's avatar 张彦钊

change test file

parent ce8cfc08
...@@ -53,7 +53,6 @@ def get_map(): ...@@ -53,7 +53,6 @@ def get_map():
c = time.time() c = time.time()
leve3_number, leve3_map = get_list(db, sql, 1+leve2_number+apps_number) leve3_number, leve3_map = get_list(db, sql, 1+leve2_number+apps_number)
print((time.time() - c) / 60) print((time.time() - c) / 60)
return apps_number, app_list_map,leve2_number, leve2_map,leve3_number, leve3_map return apps_number, app_list_map,leve2_number, leve2_map,leve3_number, leve3_map
...@@ -67,10 +66,16 @@ def get_unique(db,sql): ...@@ -67,10 +66,16 @@ def get_unique(db,sql):
print(len(v)) print(len(v))
return v return v
def con_sql(db,sql):
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
db.close()
return df
def feature_engineer(): def feature_engineer():
apps_number, app_list_map, level2_number, level2_map, level3_number, level3_map = get_map() apps_number, app_list_map, level2_number, level2_map, level3_number, level3_map = get_map()
unique_values = [] unique_values = []
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select distinct stat_date from esmm_train_data_dur" sql = "select distinct stat_date from esmm_train_data_dur"
...@@ -188,7 +193,7 @@ def feature_engineer(): ...@@ -188,7 +193,7 @@ def feature_engineer():
"tag1","tag2","tag3","tag4","tag5","tag6","tag7", "tag1","tag2","tag3","tag4","tag5","tag6","tag7",
"ucity_id", "ccity_name","device_type", "manufacturer", "channel", "top", "time", "ucity_id", "ccity_name","device_type", "manufacturer", "channel", "top", "time",
"hospital_id","treatment_method", "price_min", "price_max", "treatment_time", "hospital_id","treatment_method", "price_min", "price_max", "treatment_time",
"maintain_time","recover_time").rdd.coalesce(200)\ "maintain_time","recover_time").rdd.repartition(200)\
.map(lambda x: (x[0],float(x[1]),float(x[2]), .map(lambda x: (x[0],float(x[1]),float(x[2]),
app_list_func(x[3], app_list_map), app_list_func(x[4], level2_map), app_list_func(x[3], app_list_map), app_list_func(x[4], level2_map),
app_list_func(x[5], level3_map), app_list_func(x[6], level2_map), app_list_func(x[5], level3_map), app_list_func(x[6], level2_map),
...@@ -320,14 +325,6 @@ def get_predict(): ...@@ -320,14 +325,6 @@ def get_predict():
rdd.unpersist() rdd.unpersist()
def con_sql(db,sql):
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
db.close()
return df
if __name__ == '__main__': if __name__ == '__main__':
sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \ sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment