增加测试脚本

0e78fd85 · 张彦钊 · 849ab3f6 · 849ab3f6 · 0e78fd85
Commit 0e78fd85 authored Nov 13, 2018 by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 123 additions and 8 deletions

prepareTestData.py local/prepareTestData.py +0 -8

test.py local/test.py +123 -0

No files found.
--- a/local/prepareTestData.py
+++ b/local/prepareTestData.py
-from prepareData import fetch_data
-def fetch_test_data():
-    exposure, click, click_device_ids = fetch_data(start_date='2018-08-06')
--- a/local/test.py
+++ b/local/test.py
+from __future__ import print_function
+from utils import con_sql
+import datetime
+import time
+import pymysql
+from pyspark.sql import SparkSession
+def fetch_data(start_date, end_date):
+    spark = SparkSession \
+        .builder \
+        .appName("get_data") \
+        .getOrCreate()
+    sql = "select cid,device_id,stat_date from data_feed_click " \
+          "where stat_date >= '{0}' and stat_date <= '{1}'".format(start_date, end_date)
+    df = spark.sql(sql).na.drop().distinct
+    device_id = df.select("device_id").collect()
+    print("成功获取点击表里的数据")
+    print(device_id[0:2])
+# def hello(args):
+#     import tensorflow as tf
+#     from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifier
+#     from tensorflow.contrib.boosted_trees.proto import learner_pb2 as gbdt_learner
+#
+#     # Ignore all GPUs (current TF GBDT does not support GPU).
+#     import os
+#     os.environ["CUDA_VISIBLE_DEVICES"] = ""
+#
+#     # Import MNIST data
+#     # Set verbosity to display errors only (Remove this line for showing warnings)
+#     tf.logging.set_verbosity(tf.logging.ERROR)
+#     from tensorflow.examples.tutorials.mnist import input_data
+#     mnist = input_data.read_data_sets("/tmp/data/", one_hot=False,
+#                                       source_url='http://yann.lecun.com/exdb/mnist/')
+#
+#     # Parameters
+#     batch_size = 10000# The number of samples per batch
+#     num_classes = 10  # The 10 digits
+#     num_features = 784  # Each image is 28x28 pixels
+#     max_steps = 10000
+#
+#     # GBDT Parameters
+#     learning_rate = 0.1
+#     l1_regul = 0.
+#     l2_regul = 1.
+#     examples_per_layer = 1000
+#     num_trees = 10
+#     max_depth = 16
+#
+#     # Fill GBDT parameters into the config proto
+#     learner_config = gbdt_learner.LearnerConfig()
+#     learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate
+#     learner_config.regularization.l1 = l1_regul
+#     learner_config.regularization.l2 = l2_regul / examples_per_layer
+#     learner_config.constraints.max_tree_depth = max_depth
+#     growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER
+#     learner_config.growing_mode = growing_mode
+#     run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
+#     learner_config.multi_class_strategy = (
+#         gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN) \
+#  \
+#         # Create a TensorFlor GBDT Estimator
+#     gbdt_model = GradientBoostedDecisionTreeClassifier(
+#         model_dir=None,  # No save directory specified
+#         learner_config=learner_config,
+#         n_classes=num_classes,
+#         examples_per_layer=examples_per_layer,
+#         num_trees=num_trees,
+#         center_bias=False,
+#         config=run_config)
+#
+#     # Display TF info logs
+#     tf.logging.set_verbosity(tf.logging.INFO)
+#
+#     # Define the input function for training
+#     input_fn = tf.estimator.inputs.numpy_input_fn(
+#         x={'images': mnist.train.images}, y=mnist.train.labels,
+#         batch_size=batch_size, num_epochs=None, shuffle=True)
+#     # Train the Model
+#     gbdt_model.fit(input_fn=input_fn, max_steps=max_steps)
+#
+#     # Evaluate the Model
+#     # Define the input function for evaluating
+#     input_fn = tf.estimator.inputs.numpy_input_fn(
+#         x={'images': mnist.test.images}, y=mnist.test.labels,
+#         batch_size=batch_size, shuffle=False)
+#     # Use the Estimator 'evaluate' method
+#     e = gbdt_model.evaluate(input_fn=input_fn)
+#
+#     print("Testing Accuracy:", e['accuracy'])
+if __name__ == "__main__":
+    fetch_data("2018-11-11","2018-11-12")
+  # from pyspark.context import SparkContext
+  # from pyspark.conf import SparkConf
+  # from tensorflowonspark import TFCluster
+  # import argparse
+  #
+  # sc = SparkContext(conf=SparkConf().setAppName("mnist_spark"))
+  # executors = sc._conf.get("spark.executor.instances")
+  # num_executors = int(executors) if executors is not None else 1
+  #
+  # parser = argparse.ArgumentParser()
+  # parser.add_argument("--batch_size", help="number of records per batch", type=int, default=100)
+  # parser.add_argument("--cluster_size", help="number of nodes in the cluster", type=int, default=num_executors)
+  # parser.add_argument("--data_dir", help="path to MNIST data", default="MNIST-data")
+  # parser.add_argument("--model", help="path to save model/checkpoint", default="mnist_model")
+  # parser.add_argument("--num_ps", help="number of PS nodes in cluster", type=int, default=1)
+  # parser.add_argument("--steps", help="maximum number of steps", type=int, default=1000)
+  # parser.add_argument("--tensorboard", help="launch tensorboard process", action="store_true")
+  #
+  # args = parser.parse_args()
+  # print("args:", args)
+  #
+  # cluster = TFCluster.run(sc, hello, args, args.cluster_size, args.num_ps, tensorboard=args.tensorboard, input_mode=TFCluster.InputMode.TENSORFLOW, log_dir=args.model, master_node='master')
+  # cluster.shutdown()