Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
C
crawler
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Chengyang Zhong
crawler
Commits
8c12e7b1
Commit
8c12e7b1
authored
Jan 01, 2021
by
haowang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix
parent
88a69b79
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
31 additions
and
33 deletions
+31
-33
.spider.py.swp
tasks/zhihu/.spider.py.swp
+0
-0
orcmodel.py
tasks/zhihu/captcha/orcmodel.py
+2
-3
utils.py
tasks/zhihu/captcha/utils.py
+25
-26
zhihu_captcha.py
tasks/zhihu/captcha/zhihu_captcha.py
+2
-3
spider.py
tasks/zhihu/spider.py
+2
-1
No files found.
tasks/zhihu/.spider.py.swp
0 → 100644
View file @
8c12e7b1
File added
tasks/zhihu/captcha/o
cr
model.py
→
tasks/zhihu/captcha/o
rc
model.py
View file @
8c12e7b1
import
tensorflow
as
tf
from
zhihu_
captcha
import
utils
from
captcha
import
utils
from
tensorflow.python.training
import
moving_averages
...
...
@@ -225,4 +225,4 @@ class LSTMOCR(object):
ksize
=
[
1
,
ksize
,
ksize
,
1
],
strides
=
[
1
,
strides
,
strides
,
1
],
padding
=
'SAME'
,
name
=
'max_pool'
)
\ No newline at end of file
name
=
'max_pool'
)
tasks/zhihu/captcha/utils.py
View file @
8c12e7b1
...
...
@@ -9,38 +9,38 @@ num_classes = 38#3 + 2 + 10 + 1 + 1
maxPrintLen
=
100
tf
.
compat
.
v1
.
flags
.
DEFINE_boolean
(
'restore'
,
True
,
'whether to restore from the latest checkpoint'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_string
(
'checkpoint_dir'
,
'./checkpoint/'
,
'the checkpoint dir'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_float
(
'initial_learning_rate'
,
1e-3
,
'inital lr'
)
tf
.
app
.
flags
.
DEFINE_boolean
(
'restore'
,
True
,
'whether to restore from the latest checkpoint'
)
tf
.
app
.
flags
.
DEFINE_string
(
'checkpoint_dir'
,
'./checkpoint/'
,
'the checkpoint dir'
)
tf
.
app
.
flags
.
DEFINE_float
(
'initial_learning_rate'
,
1e-3
,
'inital lr'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_integer
(
'image_height'
,
60
,
'image height'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_integer
(
'image_width'
,
150
,
'image width'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_integer
(
'image_channel'
,
1
,
'image channels as input'
)
tf
.
app
.
flags
.
DEFINE_integer
(
'image_height'
,
60
,
'image height'
)
tf
.
app
.
flags
.
DEFINE_integer
(
'image_width'
,
150
,
'image width'
)
tf
.
app
.
flags
.
DEFINE_integer
(
'image_channel'
,
1
,
'image channels as input'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_integer
(
'max_stepsize'
,
64
,
'max stepsize in lstm, as well as '
tf
.
app
.
flags
.
DEFINE_integer
(
'max_stepsize'
,
64
,
'max stepsize in lstm, as well as '
'the output channels of last layer in CNN'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_integer
(
'num_hidden'
,
128
,
'number of hidden units in lstm'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_integer
(
'num_epochs'
,
1000
,
'maximum epochs'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_integer
(
'batch_size'
,
128
,
'the batch_size'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_integer
(
'save_steps'
,
500
,
'the step to save checkpoint'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_integer
(
'validation_steps'
,
500
,
'the step to validation'
)
tf
.
app
.
flags
.
DEFINE_integer
(
'num_hidden'
,
128
,
'number of hidden units in lstm'
)
tf
.
app
.
flags
.
DEFINE_integer
(
'num_epochs'
,
1000
,
'maximum epochs'
)
tf
.
app
.
flags
.
DEFINE_integer
(
'batch_size'
,
128
,
'the batch_size'
)
tf
.
app
.
flags
.
DEFINE_integer
(
'save_steps'
,
500
,
'the step to save checkpoint'
)
tf
.
app
.
flags
.
DEFINE_integer
(
'validation_steps'
,
500
,
'the step to validation'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_float
(
'decay_rate'
,
0.98
,
'the lr decay rate'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_float
(
'beta1'
,
0.9
,
'parameter of adam optimizer beta1'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_float
(
'beta2'
,
0.999
,
'adam parameter beta2'
)
tf
.
app
.
flags
.
DEFINE_float
(
'decay_rate'
,
0.98
,
'the lr decay rate'
)
tf
.
app
.
flags
.
DEFINE_float
(
'beta1'
,
0.9
,
'parameter of adam optimizer beta1'
)
tf
.
app
.
flags
.
DEFINE_float
(
'beta2'
,
0.999
,
'adam parameter beta2'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_integer
(
'decay_steps'
,
1000
,
'the lr decay_step for optimizer'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_float
(
'momentum'
,
0.9
,
'the momentum'
)
tf
.
app
.
flags
.
DEFINE_integer
(
'decay_steps'
,
1000
,
'the lr decay_step for optimizer'
)
tf
.
app
.
flags
.
DEFINE_float
(
'momentum'
,
0.9
,
'the momentum'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_string
(
'train_dir'
,
'./imgs/train/'
,
'the train data dir'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_string
(
'val_dir'
,
'./imgs/val/'
,
'the val data dir'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_string
(
'infer_dir'
,
'./imgs/infer/'
,
'the infer data dir'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_string
(
'logs_dir'
,
'./log'
,
'the logging dir'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_string
(
'mode'
,
'train'
,
'train, val or infer'
)
tf
.
compat
.
v1
.
flags
.
DEFINE_integer
(
'num_gpus'
,
1
,
'num of gpus'
)
tf
.
app
.
flags
.
DEFINE_string
(
'train_dir'
,
'./imgs/train/'
,
'the train data dir'
)
tf
.
app
.
flags
.
DEFINE_string
(
'val_dir'
,
'./imgs/val/'
,
'the val data dir'
)
tf
.
app
.
flags
.
DEFINE_string
(
'infer_dir'
,
'./imgs/infer/'
,
'the infer data dir'
)
tf
.
app
.
flags
.
DEFINE_string
(
'logs_dir'
,
'./log'
,
'the logging dir'
)
tf
.
app
.
flags
.
DEFINE_string
(
'mode'
,
'train'
,
'train, val or infer'
)
tf
.
app
.
flags
.
DEFINE_integer
(
'num_gpus'
,
1
,
'num of gpus'
)
FLAGS
=
tf
.
compat
.
v1
.
flags
.
FLAGS
FLAGS
=
tf
.
app
.
flags
.
FLAGS
# num_batches_per_epoch = int(num_train_samples/FLAGS.batch_size)
...
...
@@ -171,4 +171,4 @@ def sparse_tuple_from_label(sequences, dtype=np.int32):
values
=
np
.
asarray
(
values
,
dtype
=
dtype
)
shape
=
np
.
asarray
([
len
(
sequences
),
np
.
asarray
(
indices
)
.
max
(
0
)[
1
]
+
1
],
dtype
=
np
.
int64
)
return
indices
,
values
,
shape
\ No newline at end of file
return
indices
,
values
,
shape
tasks/zhihu/captcha/zhihu_captcha.py
View file @
8c12e7b1
...
...
@@ -10,7 +10,7 @@ import webbrowser
from
io
import
BytesIO
from
captcha
import
utils
from
captcha
import
orcmodel
import
tensorflow
.compat.v1
as
tf
import
tensorflow
as
tf
from
PIL
import
Image
import
numpy
as
np
...
...
@@ -70,4 +70,4 @@ class ZhihuCaptcha():
expression
+=
''
else
:
expression
+=
utils
.
decode_maps
[
i
]
return
expression
\ No newline at end of file
return
expression
tasks/zhihu/spider.py
View file @
8c12e7b1
...
...
@@ -27,7 +27,6 @@ class Spider(object):
'''
初始化数据库,调整js规则
'''
self
.
login_req
=
self
.
_login
()
self
.
conn
=
pymysql
.
connect
(
host
=
HOST
,
port
=
PORT
,
user
=
USER
,
passwd
=
PASSWD
,
db
=
DB
,
charset
=
'utf8'
)
...
...
@@ -53,6 +52,8 @@ class Spider(object):
self
.
session
=
requests
.
session
()
self
.
HTMLSession
=
HTMLSession
()
self
.
login_req
=
self
.
_login
()
def
_login
(
self
):
url
=
'https://www.zhihu.com'
loginUrl
=
'https://www.zhihu.com/login/email'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment