修改测试文件

38d63dc0 · 张彦钊 · 8662e492 · 38d63dc0 · 38d63dc0
Commit 38d63dc0 authored May 05, 2019 by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 9 additions and 4 deletions

multi.py tensnsorflow/multi.py +6 -4

record.py tensnsorflow/record.py +3 -0

No files found.
--- a/tensnsorflow/multi.py
+++ b/tensnsorflow/multi.py
@@ -197,14 +197,16 @@ def con_sql(db,sql):
 def test():
+    sql = "select stat_date,cid_id,y,ccity_name from esmm_train_data limit 60"
+    rdd = spark.sql(sql).select("stat_date","cid_id","y","ccity_name").rdd
+    spark.createDataFrame(rdd).show(6)
    from hdfs import InsecureClient
    from hdfs.ext.dataframe import read_dataframe
    client = InsecureClient('http://nvwa01:50070')
+    df = read_dataframe(client,"/recommend/native/part-00199-f83757ab-9f64-4a2c-9f27-0b76df51c1c4-c000.avro")
-    df = read_dataframe(client,"/recommend/va/*.avro")
    print(df.head())
-    print(df.count())
+    # print(df.count())
    # spark.sql("use online")
    # spark.sql("ADD JAR /srv/apps/brickhouse-0.7.1-SNAPSHOT.jar")

--- a/tensnsorflow/record.py
+++ b/tensnsorflow/record.py
@@ -28,6 +28,9 @@ def gen_tfrecords(in_file):
    from hdfs.ext.dataframe import read_dataframe
    client = InsecureClient('http://nvwa01:50070')
    df = read_dataframe(client,"/recommend/tr/part-00000-2f0d632b-0c61-4a0b-97d4-54bd5e579c5e-c000.avro")
+    df = df.rename({"app_list","level2_ids","level3_ids","stat_date","ucity_id", "ccity_name", "device_type", "manufacturer",
+                  "channel", "top", "time", "hospital_id","treatment_method", "price_min",
+                  "price_max", "treatment_time","maintain_time", "recover_time","y","z"})
    for i in range(df.shape[0]):
        feats = ["cid_id"]