Commit d94b9aa5 authored by 张彦钊's avatar 张彦钊

change test file

parent ce8cfc08
......@@ -53,7 +53,6 @@ def get_map():
c = time.time()
leve3_number, leve3_map = get_list(db, sql, 1+leve2_number+apps_number)
print((time.time() - c) / 60)
return apps_number, app_list_map,leve2_number, leve2_map,leve3_number, leve3_map
......@@ -67,10 +66,16 @@ def get_unique(db,sql):
print(len(v))
return v
def con_sql(db,sql):
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
db.close()
return df
def feature_engineer():
apps_number, app_list_map, level2_number, level2_map, level3_number, level3_map = get_map()
unique_values = []
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select distinct stat_date from esmm_train_data_dur"
......@@ -188,7 +193,7 @@ def feature_engineer():
"tag1","tag2","tag3","tag4","tag5","tag6","tag7",
"ucity_id", "ccity_name","device_type", "manufacturer", "channel", "top", "time",
"hospital_id","treatment_method", "price_min", "price_max", "treatment_time",
"maintain_time","recover_time").rdd.coalesce(200)\
"maintain_time","recover_time").rdd.repartition(200)\
.map(lambda x: (x[0],float(x[1]),float(x[2]),
app_list_func(x[3], app_list_map), app_list_func(x[4], level2_map),
app_list_func(x[5], level3_map), app_list_func(x[6], level2_map),
......@@ -320,14 +325,6 @@ def get_predict():
rdd.unpersist()
def con_sql(db,sql):
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
db.close()
return df
if __name__ == '__main__':
sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment