Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
0e78fd85
Commit
0e78fd85
authored
Nov 13, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
增加测试脚本
parent
849ab3f6
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
123 additions
and
8 deletions
+123
-8
prepareTestData.py
local/prepareTestData.py
+0
-8
test.py
local/test.py
+123
-0
No files found.
local/prepareTestData.py
deleted
100644 → 0
View file @
849ab3f6
from
prepareData
import
fetch_data
def
fetch_test_data
():
exposure
,
click
,
click_device_ids
=
fetch_data
(
start_date
=
'2018-08-06'
)
local/test.py
0 → 100644
View file @
0e78fd85
from
__future__
import
print_function
from
utils
import
con_sql
import
datetime
import
time
import
pymysql
from
pyspark.sql
import
SparkSession
def
fetch_data
(
start_date
,
end_date
):
spark
=
SparkSession
\
.
builder
\
.
appName
(
"get_data"
)
\
.
getOrCreate
()
sql
=
"select cid,device_id,stat_date from data_feed_click "
\
"where stat_date >= '{0}' and stat_date <= '{1}'"
.
format
(
start_date
,
end_date
)
df
=
spark
.
sql
(
sql
)
.
na
.
drop
()
.
distinct
device_id
=
df
.
select
(
"device_id"
)
.
collect
()
print
(
"成功获取点击表里的数据"
)
print
(
device_id
[
0
:
2
])
# def hello(args):
# import tensorflow as tf
# from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifier
# from tensorflow.contrib.boosted_trees.proto import learner_pb2 as gbdt_learner
#
# # Ignore all GPUs (current TF GBDT does not support GPU).
# import os
# os.environ["CUDA_VISIBLE_DEVICES"] = ""
#
# # Import MNIST data
# # Set verbosity to display errors only (Remove this line for showing warnings)
# tf.logging.set_verbosity(tf.logging.ERROR)
# from tensorflow.examples.tutorials.mnist import input_data
# mnist = input_data.read_data_sets("/tmp/data/", one_hot=False,
# source_url='http://yann.lecun.com/exdb/mnist/')
#
# # Parameters
# batch_size = 10000# The number of samples per batch
# num_classes = 10 # The 10 digits
# num_features = 784 # Each image is 28x28 pixels
# max_steps = 10000
#
# # GBDT Parameters
# learning_rate = 0.1
# l1_regul = 0.
# l2_regul = 1.
# examples_per_layer = 1000
# num_trees = 10
# max_depth = 16
#
# # Fill GBDT parameters into the config proto
# learner_config = gbdt_learner.LearnerConfig()
# learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate
# learner_config.regularization.l1 = l1_regul
# learner_config.regularization.l2 = l2_regul / examples_per_layer
# learner_config.constraints.max_tree_depth = max_depth
# growing_mode = gbdt_learner.LearnerConfig.LAYER_BY_LAYER
# learner_config.growing_mode = growing_mode
# run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
# learner_config.multi_class_strategy = (
# gbdt_learner.LearnerConfig.DIAGONAL_HESSIAN) \
# \
# # Create a TensorFlor GBDT Estimator
# gbdt_model = GradientBoostedDecisionTreeClassifier(
# model_dir=None, # No save directory specified
# learner_config=learner_config,
# n_classes=num_classes,
# examples_per_layer=examples_per_layer,
# num_trees=num_trees,
# center_bias=False,
# config=run_config)
#
# # Display TF info logs
# tf.logging.set_verbosity(tf.logging.INFO)
#
# # Define the input function for training
# input_fn = tf.estimator.inputs.numpy_input_fn(
# x={'images': mnist.train.images}, y=mnist.train.labels,
# batch_size=batch_size, num_epochs=None, shuffle=True)
# # Train the Model
# gbdt_model.fit(input_fn=input_fn, max_steps=max_steps)
#
# # Evaluate the Model
# # Define the input function for evaluating
# input_fn = tf.estimator.inputs.numpy_input_fn(
# x={'images': mnist.test.images}, y=mnist.test.labels,
# batch_size=batch_size, shuffle=False)
# # Use the Estimator 'evaluate' method
# e = gbdt_model.evaluate(input_fn=input_fn)
#
# print("Testing Accuracy:", e['accuracy'])
if
__name__
==
"__main__"
:
fetch_data
(
"2018-11-11"
,
"2018-11-12"
)
# from pyspark.context import SparkContext
# from pyspark.conf import SparkConf
# from tensorflowonspark import TFCluster
# import argparse
#
# sc = SparkContext(conf=SparkConf().setAppName("mnist_spark"))
# executors = sc._conf.get("spark.executor.instances")
# num_executors = int(executors) if executors is not None else 1
#
# parser = argparse.ArgumentParser()
# parser.add_argument("--batch_size", help="number of records per batch", type=int, default=100)
# parser.add_argument("--cluster_size", help="number of nodes in the cluster", type=int, default=num_executors)
# parser.add_argument("--data_dir", help="path to MNIST data", default="MNIST-data")
# parser.add_argument("--model", help="path to save model/checkpoint", default="mnist_model")
# parser.add_argument("--num_ps", help="number of PS nodes in cluster", type=int, default=1)
# parser.add_argument("--steps", help="maximum number of steps", type=int, default=1000)
# parser.add_argument("--tensorboard", help="launch tensorboard process", action="store_true")
#
# args = parser.parse_args()
# print("args:", args)
#
# cluster = TFCluster.run(sc, hello, args, args.cluster_size, args.num_ps, tensorboard=args.tensorboard, input_mode=TFCluster.InputMode.TENSORFLOW, log_dir=args.model, master_node='master')
# cluster.shutdown()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment