Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
P
physical
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
alpha
physical
Commits
2d7a880c
Commit
2d7a880c
authored
6 years ago
by
段英荣
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'similar_sort' into 'master'
Similar sort See merge request
!153
parents
236b7237
e51ef4f3
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
43 additions
and
19 deletions
+43
-19
table_scan.py
libs/table_scan.py
+22
-1
topic.py
search/utils/topic.py
+17
-17
trans2es_data2es_parallel.py
trans2es/management/commands/trans2es_data2es_parallel.py
+2
-0
topic.py
trans2es/models/topic.py
+2
-1
No files found.
libs/table_scan.py
View file @
2d7a880c
...
@@ -5,6 +5,8 @@ from __future__ import unicode_literals, print_function, absolute_import
...
@@ -5,6 +5,8 @@ from __future__ import unicode_literals, print_function, absolute_import
import
six
import
six
import
random
import
random
from
django.db
import
models
from
django.db
import
models
import
logging
import
traceback
class
ITableChunk
(
object
):
class
ITableChunk
(
object
):
...
@@ -147,13 +149,25 @@ class TableSlicerChunk(ITableChunk):
...
@@ -147,13 +149,25 @@ class TableSlicerChunk(ITableChunk):
class
TableSlicer
(
object
):
class
TableSlicer
(
object
):
def
__init__
(
self
,
queryset
,
chunk_size
=
None
,
chunk_count
=
None
,
sep_list
=
None
):
def
__init__
(
self
,
queryset
,
chunk_size
=
None
,
chunk_count
=
None
,
sep_list
=
None
):
try
:
logging
.
info
(
"duan add,before assert queryset"
)
assert
isinstance
(
queryset
,
models
.
QuerySet
)
assert
isinstance
(
queryset
,
models
.
QuerySet
)
logging
.
info
(
"duan add,before assert chunk_size"
)
assert
chunk_size
is
None
or
isinstance
(
chunk_size
,
six
.
integer_types
)
assert
chunk_size
is
None
or
isinstance
(
chunk_size
,
six
.
integer_types
)
logging
.
info
(
"duan add,before assert chunk_count"
)
assert
chunk_count
is
None
or
isinstance
(
chunk_count
,
six
.
integer_types
)
assert
chunk_count
is
None
or
isinstance
(
chunk_count
,
six
.
integer_types
)
logging
.
info
(
"duan add,before assert sep_list"
)
assert
sep_list
is
None
or
isinstance
(
sep_list
,
list
)
assert
sep_list
is
None
or
isinstance
(
sep_list
,
list
)
logging
.
info
(
"duan add,before assert chunk_size"
)
assert
(
chunk_size
is
not
None
)
+
(
chunk_count
is
not
None
)
+
(
sep_list
is
not
None
)
==
1
assert
(
chunk_size
is
not
None
)
+
(
chunk_count
is
not
None
)
+
(
sep_list
is
not
None
)
==
1
logging
.
info
(
"duan add,after assert chunk_size"
)
logging
.
info
(
"duan add,sep_list:
%
s"
%
str
(
sep_list
))
if
sep_list
is
not
None
:
if
sep_list
is
not
None
:
sep_list
=
list
(
sep_list
)
sep_list
=
list
(
sep_list
)
else
:
else
:
...
@@ -166,17 +180,24 @@ class TableSlicer(object):
...
@@ -166,17 +180,24 @@ class TableSlicer(object):
for
index
in
index_list
for
index
in
index_list
]
]
logging
.
info
(
"duan add,queryset count:
%
d"
%
count
)
self
.
_model
=
queryset
.
model
self
.
_model
=
queryset
.
model
self
.
_query
=
queryset
.
query
self
.
_query
=
queryset
.
query
self
.
_sep_list
=
[
None
]
+
sep_list
+
[
None
]
self
.
_sep_list
=
[
None
]
+
sep_list
+
[
None
]
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
def
chunks
(
self
):
def
chunks
(
self
):
try
:
reversed_sep_list
=
list
(
reversed
(
self
.
_sep_list
))
reversed_sep_list
=
list
(
reversed
(
self
.
_sep_list
))
logging
.
info
(
"duan add,reversed_sep_list:
%
d"
%
(
len
(
self
.
_sep_list
)
-
1
))
for
i
in
range
(
len
(
self
.
_sep_list
)
-
1
):
for
i
in
range
(
len
(
self
.
_sep_list
)
-
1
):
pk_start
=
reversed_sep_list
[
i
+
1
]
pk_start
=
reversed_sep_list
[
i
+
1
]
pk_stop
=
reversed_sep_list
[
i
]
pk_stop
=
reversed_sep_list
[
i
]
yield
TableSlicerChunk
(
model
=
self
.
_model
,
query
=
self
.
_query
,
pk_start
=
pk_start
,
pk_stop
=
pk_stop
)
yield
TableSlicerChunk
(
model
=
self
.
_model
,
query
=
self
.
_query
,
pk_start
=
pk_start
,
pk_stop
=
pk_stop
)
except
:
logging
.
error
(
"catch exception,err_msg:
%
s"
%
traceback
.
format_exc
())
class
TableStreamingSlicer
(
object
):
class
TableStreamingSlicer
(
object
):
...
...
This diff is collapsed.
Click to expand it.
search/utils/topic.py
View file @
2d7a880c
...
@@ -240,24 +240,24 @@ class TopicUtils(object):
...
@@ -240,24 +240,24 @@ class TopicUtils(object):
{
"term"
:
{
"has_image"
:
True
}},
{
"term"
:
{
"has_image"
:
True
}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_online"
:
True
}},
{
"term"
:
{
"is_deleted"
:
False
}}
{
"term"
:
{
"is_deleted"
:
False
}}
],
"should"
:
[
{
"bool"
:{
"must"
:[
{
"term"
:{
"has_image"
:
True
}},
{
"term"
:
{
"has_video"
:
False
}}
]
]
}
# "should": [
},{
# {
"bool"
:{
# "bool":{
"must"
:{
# "must":[
"term"
:{
"has_video"
:
True
}
# {"term":{"has_image":True}},
}
# {"term": {"has_video": False}}
}
# ]
}
# }
],
# },{
"minimum_should_match"
:
1
# "bool":{
# "must":{
# "term":{"has_video":True}
# }
# }
# }
# ],
# "minimum_should_match":1
}
}
},
},
"score_mode"
:
"sum"
,
"score_mode"
:
"sum"
,
...
...
This diff is collapsed.
Click to expand it.
trans2es/management/commands/trans2es_data2es_parallel.py
View file @
2d7a880c
...
@@ -116,8 +116,10 @@ class Command(BaseCommand):
...
@@ -116,8 +116,10 @@ class Command(BaseCommand):
type_info
=
get_type_info_map
()[
type_name
]
type_info
=
get_type_info_map
()[
type_name
]
query_set
=
type_info
.
queryset
query_set
=
type_info
.
queryset
logging
.
info
(
"before TableSlicer"
)
slicer
=
TableSlicer
(
queryset
=
query_set
,
chunk_size
=
type_info
.
bulk_insert_chunk_size
)
slicer
=
TableSlicer
(
queryset
=
query_set
,
chunk_size
=
type_info
.
bulk_insert_chunk_size
)
for
chunk
in
slicer
.
chunks
():
for
chunk
in
slicer
.
chunks
():
logging
.
info
(
"in chunks...."
)
job
=
Job
(
job
=
Job
(
sub_index_name
=
type_name
,
sub_index_name
=
type_name
,
type_name
=
type_name
,
type_name
=
type_name
,
...
...
This diff is collapsed.
Click to expand it.
trans2es/models/topic.py
View file @
2d7a880c
#!/usr/bin/env python
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -*- coding: utf-8 -*-
from
__future__
import
unicode_literals
,
absolute_import
,
print_function
from
django.conf
import
settings
from
django.conf
import
settings
from
django.core.management.base
import
BaseCommand
,
CommandError
from
django.core.management.base
import
BaseCommand
,
CommandError
...
@@ -54,7 +55,7 @@ class Topic(models.Model):
...
@@ -54,7 +55,7 @@ class Topic(models.Model):
Group
,
verbose_name
=
u"关联的小组"
,
related_name
=
u"group_topics"
,
null
=
True
,
blank
=
True
,
default
=
None
,
Group
,
verbose_name
=
u"关联的小组"
,
related_name
=
u"group_topics"
,
null
=
True
,
blank
=
True
,
default
=
None
,
on_delete
=
models
.
CASCADE
)
on_delete
=
models
.
CASCADE
)
user_id
=
models
.
IntegerField
(
verbose_name
=
u'用户ID'
)
user_id
=
models
.
IntegerField
(
verbose_name
=
u'用户ID'
)
has_video
=
models
.
Integer
Field
(
verbose_name
=
u'是否是视频日记'
)
has_video
=
models
.
Boolean
Field
(
verbose_name
=
u'是否是视频日记'
)
drop_score
=
models
.
IntegerField
(
verbose_name
=
u'人工赋分'
,
default
=
0
)
drop_score
=
models
.
IntegerField
(
verbose_name
=
u'人工赋分'
,
default
=
0
)
description
=
models
.
CharField
(
verbose_name
=
u'日记本描述'
,
max_length
=
200
)
description
=
models
.
CharField
(
verbose_name
=
u'日记本描述'
,
max_length
=
200
)
content
=
models
.
CharField
(
verbose_name
=
u'日记本内容'
,
max_length
=
1000
)
content
=
models
.
CharField
(
verbose_name
=
u'日记本内容'
,
max_length
=
1000
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment