mirror of
https://github.com/phodal/github
synced 2026-05-22 00:29:47 +00:00
update analtyics 2
This commit is contained in:
parent
e203c23929
commit
779e9652b6
3 changed files with 239 additions and 179 deletions
|
|
@ -3,25 +3,34 @@
|
|||
|
||||
让我们分析之前的程序,然后再想办法做出优化。网上看到一篇文章[http://www.huyng.com/posts/python-performance-analysis/](http://www.huyng.com/posts/python-performance-analysis/)讲的就是分析这部分内容的。
|
||||
|
||||
#time python分析#
|
||||
##time python分析
|
||||
|
||||
分析程序的运行时间
|
||||
|
||||
$time python handle.py
|
||||
```bash
|
||||
$time python handle.py
|
||||
```
|
||||
|
||||
结果便是,但是对于我们的分析没有一点意义
|
||||
|
||||
real 0m43.411s
|
||||
user 0m39.226s
|
||||
sys 0m0.618s
|
||||
```
|
||||
real 0m43.411s
|
||||
user 0m39.226s
|
||||
sys 0m0.618s
|
||||
```
|
||||
|
||||
##line_profiler python
|
||||
|
||||
#line_profiler python#
|
||||
这是
|
||||
##Mac OS X 10.9 line_profiler Install##
|
||||
|
||||
sudo ARCHFLAGS="-Wno-error=unused-command-line-argument-hard-error-in-future" easy_install line_profiler
|
||||
```bash
|
||||
sudo ARCHFLAGS="-Wno-error=unused-command-line-argument-hard-error-in-future" easy_install line_profiler
|
||||
```
|
||||
|
||||
然后在我们的``parse_data.py``的``handle_json``前面加上``@profile``
|
||||
<pre><code class="python">
|
||||
|
||||
```python
|
||||
@profile
|
||||
def handle_json(jsonfile):
|
||||
f = open(jsonfile, "r")
|
||||
|
|
@ -37,107 +46,127 @@ def handle_json(jsonfile):
|
|||
|
||||
f.close()
|
||||
return datacount, dataarray
|
||||
</pre></code>
|
||||
```
|
||||
|
||||
Line_profiler带了一个分析脚本``kernprof.py``,so
|
||||
|
||||
kernprof.py -l -v handle.py
|
||||
```bash
|
||||
kernprof.py -l -v handle.py
|
||||
```
|
||||
|
||||
我们便会得到下面的结果
|
||||
|
||||
```
|
||||
Wrote profile results to handle.py.lprof
|
||||
Timer unit: 1e-06 s
|
||||
|
||||
Wrote profile results to handle.py.lprof
|
||||
Timer unit: 1e-06 s
|
||||
File: parse_data.py
|
||||
Function: handle_json at line 15
|
||||
Total time: 127.332 s
|
||||
|
||||
File: parse_data.py
|
||||
Function: handle_json at line 15
|
||||
Total time: 127.332 s
|
||||
|
||||
Line # Hits Time Per Hit % Time Line Contents
|
||||
==============================================================
|
||||
15 @profile
|
||||
16 def handle_json(jsonfile):
|
||||
17 19 636 33.5 0.0 f = open(jsonfile, "r")
|
||||
18 19 21 1.1 0.0 dataarray = []
|
||||
19 19 16 0.8 0.0 datacount = 0
|
||||
20
|
||||
21 212373 730344 3.4 0.6 for line in open(jsonfile):
|
||||
22 212354 2826826 13.3 2.2 line = f.readline()
|
||||
23 212354 13848171 65.2 10.9 lin = json.loads(line)
|
||||
24 212354 109427317 515.3 85.9 date = dateutil.parser.parse(lin["created_at"])
|
||||
25 212354 238112 1.1 0.2 datacount += 1
|
||||
26 212354 260227 1.2 0.2 dataarray.append(date.minute)
|
||||
27
|
||||
28 19 349 18.4 0.0 f.close()
|
||||
29 19 20 1.1 0.0 return datacount, dataarray
|
||||
Line # Hits Time Per Hit % Time Line Contents
|
||||
==============================================================
|
||||
15 @profile
|
||||
16 def handle_json(jsonfile):
|
||||
17 19 636 33.5 0.0 f = open(jsonfile, "r")
|
||||
18 19 21 1.1 0.0 dataarray = []
|
||||
19 19 16 0.8 0.0 datacount = 0
|
||||
20
|
||||
21 212373 730344 3.4 0.6 for line in open(jsonfile):
|
||||
22 212354 2826826 13.3 2.2 line = f.readline()
|
||||
23 212354 13848171 65.2 10.9 lin = json.loads(line)
|
||||
24 212354 109427317 515.3 85.9 date = dateutil.parser.parse(lin["created_at"])
|
||||
25 212354 238112 1.1 0.2 datacount += 1
|
||||
26 212354 260227 1.2 0.2 dataarray.append(date.minute)
|
||||
27
|
||||
28 19 349 18.4 0.0 f.close()
|
||||
29 19 20 1.1 0.0 return datacount, dataarray
|
||||
```
|
||||
|
||||
于是我们就发现我们的瓶颈就是从读取``created_at``,即创建时间。。。以及解析json,反而不是我们关心的IO,果然``readline``很强大。
|
||||
|
||||
#memory_profiler python#
|
||||
##memory_profiler install##
|
||||
##memory_profiler python
|
||||
|
||||
$ pip install -U memory_profiler
|
||||
$ pip install psutil
|
||||
###memory_profiler install
|
||||
|
||||
```bash
|
||||
$ pip install -U memory_profiler
|
||||
$ pip install psutil
|
||||
```
|
||||
|
||||
###memory_profiler python
|
||||
|
||||
##memory_profiler python##
|
||||
如上,我们只需要在``handle_json``前面加上``@profile``
|
||||
|
||||
python -m memory_profiler handle.py
|
||||
```bash
|
||||
python -m memory_profiler handle.py
|
||||
```
|
||||
|
||||
于是
|
||||
|
||||
```
|
||||
Filename: parse_data.py
|
||||
|
||||
Line # Mem usage Increment Line Contents
|
||||
================================================
|
||||
13 39.930 MiB 0.000 MiB @profile
|
||||
14 def handle_json(jsonfile):
|
||||
15 39.930 MiB 0.000 MiB f = open(jsonfile, "r")
|
||||
16 39.930 MiB 0.000 MiB dataarray = []
|
||||
17 39.930 MiB 0.000 MiB datacount = 0
|
||||
18
|
||||
19 40.055 MiB 0.125 MiB for line in open(jsonfile):
|
||||
20 40.055 MiB 0.000 MiB line = f.readline()
|
||||
21 40.066 MiB 0.012 MiB lin = json.loads(line)
|
||||
22 40.055 MiB -0.012 MiB date = dateutil.parser.parse(lin["created_at"])
|
||||
23 40.055 MiB 0.000 MiB datacount += 1
|
||||
24 40.055 MiB 0.000 MiB dataarray.append(date.minute)
|
||||
25
|
||||
26 f.close()
|
||||
27 return datacount, dataarray
|
||||
```
|
||||
|
||||
Filename: parse_data.py
|
||||
|
||||
Line # Mem usage Increment Line Contents
|
||||
================================================
|
||||
13 39.930 MiB 0.000 MiB @profile
|
||||
14 def handle_json(jsonfile):
|
||||
15 39.930 MiB 0.000 MiB f = open(jsonfile, "r")
|
||||
16 39.930 MiB 0.000 MiB dataarray = []
|
||||
17 39.930 MiB 0.000 MiB datacount = 0
|
||||
18
|
||||
19 40.055 MiB 0.125 MiB for line in open(jsonfile):
|
||||
20 40.055 MiB 0.000 MiB line = f.readline()
|
||||
21 40.066 MiB 0.012 MiB lin = json.loads(line)
|
||||
22 40.055 MiB -0.012 MiB date = dateutil.parser.parse(lin["created_at"])
|
||||
23 40.055 MiB 0.000 MiB datacount += 1
|
||||
24 40.055 MiB 0.000 MiB dataarray.append(date.minute)
|
||||
25
|
||||
26 f.close()
|
||||
27 return datacount, dataarray
|
||||
##objgraph python
|
||||
|
||||
###objgraph install
|
||||
|
||||
#objgraph python#
|
||||
|
||||
##objgraph install##
|
||||
|
||||
pip install objgraph
|
||||
```bash
|
||||
pip install objgraph
|
||||
```
|
||||
|
||||
我们需要调用他
|
||||
|
||||
import pdb;
|
||||
```python
|
||||
import pdb;
|
||||
```
|
||||
|
||||
以及在需要调度的地方加上
|
||||
|
||||
pdb.set_trace()
|
||||
```python
|
||||
pdb.set_trace()
|
||||
```
|
||||
|
||||
接着会进入``command``模式
|
||||
|
||||
(pdb) import objgraph
|
||||
(pdb) objgraph.show_most_common_types()
|
||||
```python
|
||||
(pdb) import objgraph
|
||||
(pdb) objgraph.show_most_common_types()
|
||||
```
|
||||
|
||||
然后我们可以找到。。
|
||||
|
||||
function 8259
|
||||
dict 2137
|
||||
tuple 1949
|
||||
wrapper_descriptor 1625
|
||||
list 1586
|
||||
weakref 1145
|
||||
builtin_function_or_method 1117
|
||||
method_descriptor 948
|
||||
getset_descriptor 708
|
||||
type 705
|
||||
```
|
||||
function 8259
|
||||
dict 2137
|
||||
tuple 1949
|
||||
wrapper_descriptor 1625
|
||||
list 1586
|
||||
weakref 1145
|
||||
builtin_function_or_method 1117
|
||||
method_descriptor 948
|
||||
getset_descriptor 708
|
||||
type 705
|
||||
```
|
||||
|
||||
也可以用他生成图形,貌似这里是用``dot``生成的,加上``python-xdot``
|
||||
|
||||
|
|
@ -145,17 +174,20 @@ Line_profiler带了一个分析脚本``kernprof.py``,so
|
|||
|
||||
如果我们每次都要花同样的时间去做一件事,去扫那些数据的话,那么这是最好的打发时间的方法。
|
||||
|
||||
##python SQLite3 查询数据##
|
||||
##python SQLite3 查询数据
|
||||
|
||||
我们创建了一个名为``userdata.db``的数据库文件,然后创建了一个表,里面有owner,language,eventtype,name url
|
||||
|
||||
def init_db():
|
||||
conn = sqlite3.connect('userdata.db')
|
||||
c = conn.cursor()
|
||||
c.execute('''CREATE TABLE userinfo (owner text, language text, eventtype text, name text, url text)''')
|
||||
```python
|
||||
def init_db():
|
||||
conn = sqlite3.connect('userdata.db')
|
||||
c = conn.cursor()
|
||||
c.execute('''CREATE TABLE userinfo (owner text, language text, eventtype text, name text, url text)''')
|
||||
```
|
||||
|
||||
接着我们就可以查询数据,这里从结果讲起。
|
||||
|
||||
<pre><code class="python">
|
||||
```python
|
||||
def get_count(username):
|
||||
count = 0
|
||||
userinfo = []
|
||||
|
|
@ -165,11 +197,11 @@ def get_count(username):
|
|||
userinfo.append(zero)
|
||||
|
||||
return count, userinfo
|
||||
|
||||
</code></pre>
|
||||
```
|
||||
|
||||
当我查询``gmszone``的时候,也就是我自己就会有如下的结果
|
||||
<pre><code class="bash">
|
||||
|
||||
```bash
|
||||
(u'gmszone', u'ForkEvent', u'RESUME', u'TeX', u'https://github.com/gmszone/RESUME')
|
||||
(u'gmszone', u'WatchEvent', u'iot-dashboard', u'JavaScript', u'https://github.com/gmszone/iot-dashboard')
|
||||
(u'gmszone', u'PushEvent', u'wechat-wordpress', u'Ruby', u'https://github.com/gmszone/wechat-wordpress')
|
||||
|
|
@ -180,43 +212,53 @@ def get_count(username):
|
|||
(u'gmszone', u'PushEvent', u'iot-doc', u'TeX', u'https://github.com/gmszone/iot-doc')
|
||||
(u'gmszone', u'PushEvent', u'iot-doc', u'TeX', u'https://github.com/gmszone/iot-doc')
|
||||
109
|
||||
</pre></code>
|
||||
````
|
||||
|
||||
一共有109个事件,有``Watch``,``Create``,``Push``,``Fork``还有其他的,
|
||||
项目主要有``iot``,``RESUME``,``iot-dashboard``,``wechat-wordpress``,
|
||||
接着就是语言了,``Tex``,``Javascript``,``Ruby``,接着就是项目的url了。
|
||||
|
||||
值得注意的是。
|
||||
<pre><code class="bash">
|
||||
|
||||
```bash
|
||||
-rw-r--r-- 1 fdhuang staff 905M Apr 12 14:59 userdata.db
|
||||
</code></pre>
|
||||
```
|
||||
|
||||
这个数据库文件有**905M**,不过查询结果相当让人满意,至少相对于原来的结果来说。
|
||||
|
||||
##Python SQLite3##
|
||||
##Python SQLite3
|
||||
|
||||
Python自带了对SQLite3的支持,然而我们还需要安装SQLite3
|
||||
|
||||
brew install sqlite3
|
||||
```bash
|
||||
brew install sqlite3
|
||||
```
|
||||
|
||||
或者是
|
||||
|
||||
sudo port install sqlite3
|
||||
|
||||
```bash
|
||||
sudo port install sqlite3
|
||||
```
|
||||
|
||||
或者是Ubuntu的
|
||||
|
||||
sudo apt-get install sqlite3
|
||||
```bash
|
||||
sudo apt-get install sqlite3
|
||||
```
|
||||
|
||||
openSUSE自然就是
|
||||
|
||||
sudo zypper install sqlite3
|
||||
```bash
|
||||
sudo zypper install sqlite3
|
||||
```
|
||||
|
||||
不过,用yast2也很不错,不是么。。
|
||||
|
||||
##Pythont Github Sqlite3数据导入##
|
||||
##Pythont Github Sqlite3数据导入
|
||||
|
||||
需要注意的是这里是需要python2.7,起源于对gzip的上下文管理器的支持问题
|
||||
|
||||
<pre><code class="python">
|
||||
```python
|
||||
def handle_gzip_file(filename):
|
||||
userinfo = []
|
||||
with gzip.GzipFile(filename) as f:
|
||||
|
|
@ -264,7 +306,7 @@ def build_db_with_gzip():
|
|||
|
||||
conn.commit()
|
||||
c.close()
|
||||
</code></pre>
|
||||
```
|
||||
|
||||
``executemany``可以插入多条数据,对于我们的数据来说,一小时的文件大概有五六千个会符合我们上面的安装,也就是有``actor``又有``type``才是我们需要记录的数据,我们只需要统计用户的那些事件,而非全部的事件。
|
||||
|
||||
|
|
@ -276,7 +318,9 @@ def build_db_with_gzip():
|
|||
|
||||
首先是正规匹配
|
||||
|
||||
date_re = re.compile(r"([0-9]{4})-([0-9]{2})-([0-9]{2})-([0-9]+)\.json.gz")
|
||||
```python
|
||||
date_re = re.compile(r"([0-9]{4})-([0-9]{2})-([0-9]{2})-([0-9]+)\.json.gz")
|
||||
```
|
||||
|
||||
不过主要的还是在于``glob.glob``
|
||||
|
||||
|
|
@ -290,7 +334,7 @@ def build_db_with_gzip():
|
|||
|
||||
更好的方案?
|
||||
|
||||
###redis###
|
||||
###redis
|
||||
|
||||
结合了前面两篇我们终于可以成功地读取出用户数据、处理,再接着可以找相近的用户。
|
||||
|
||||
|
|
@ -298,30 +342,36 @@ def build_db_with_gzip():
|
|||
|
||||
查询用户事件总数
|
||||
|
||||
import redis
|
||||
r = redis.StrictRedis(host='localhost', port=6379, db=0)
|
||||
pipe = pipe = r.pipeline()
|
||||
pipe.zscore('osrc:user',"gmszone")
|
||||
pipe.execute()
|
||||
```python
|
||||
import redis
|
||||
r = redis.StrictRedis(host='localhost', port=6379, db=0)
|
||||
pipe = pipe = r.pipeline()
|
||||
pipe.zscore('osrc:user',"gmszone")
|
||||
pipe.execute()
|
||||
```
|
||||
|
||||
系统返回了``227.0``,试试别人。
|
||||
|
||||
>>> pipe.zscore('osrc:user',"dfm")
|
||||
<redis.client.StrictPipeline object at 0x104fa7f50>
|
||||
>>> pipe.execute()
|
||||
[425.0]
|
||||
>>>
|
||||
```bash
|
||||
>>> pipe.zscore('osrc:user',"dfm")
|
||||
<redis.client.StrictPipeline object at 0x104fa7f50>
|
||||
>>> pipe.execute()
|
||||
[425.0]
|
||||
>>>
|
||||
```
|
||||
|
||||
看看主要是在哪一天提交的
|
||||
|
||||
>>> pipe.hgetall('osrc:user:gmszone:day')
|
||||
<redis.client.StrictPipeline object at 0x104fa7f50>
|
||||
>>> pipe.execute()
|
||||
[{'1': '51', '0': '41', '3': '17', '2': '34', '5': '28', '4': '22', '6': '34'}]
|
||||
```python
|
||||
>>> pipe.hgetall('osrc:user:gmszone:day')
|
||||
<redis.client.StrictPipeline object at 0x104fa7f50>
|
||||
>>> pipe.execute()
|
||||
[{'1': '51', '0': '41', '3': '17', '2': '34', '5': '28', '4': '22', '6': '34'}]
|
||||
```
|
||||
|
||||
结果大致如下图所示:
|
||||
|
||||
![SMTWTFS][1]
|
||||

|
||||
|
||||
看看主要的事件是?
|
||||
|
||||
|
|
@ -331,17 +381,17 @@ def build_db_with_gzip():
|
|||
[[('PushEvent', 154.0), ('CreateEvent', 41.0), ('WatchEvent', 18.0), ('GollumEvent', 8.0), ('MemberEvent', 3.0), ('ForkEvent', 2.0), ('ReleaseEvent', 1.0)]]
|
||||
>>>
|
||||
|
||||
![Main Event][2]
|
||||

|
||||
|
||||
蓝色的就是push事件,黄色的是create等等。
|
||||
|
||||
到这里我们算是知道了OSRC的数据库部分是如何工作的。
|
||||
|
||||
##Python redis 查询
|
||||
###Python redis 查询
|
||||
|
||||
主要代码如下所示
|
||||
|
||||
<pre><code class="python">
|
||||
```python
|
||||
def get_vector(user, pipe=None):
|
||||
|
||||
r = redis.StrictRedis(host='localhost', port=6379, db=0)
|
||||
|
|
@ -364,19 +414,20 @@ def get_vector(user, pipe=None):
|
|||
|
||||
if no_pipe:
|
||||
return pipe.execute()
|
||||
</code></pre>
|
||||
```
|
||||
|
||||
结果在上一篇中显示出来了,也就是
|
||||
|
||||
[227.0, {'1': '51', '0': '41', '3': '17', '2': '34', '5': '28', '4': '22', '6': '34'}, [('PushEvent', 154.0), ('CreateEvent', 41.0), ('WatchEvent', 18.0), ('GollumEvent', 8.0), ('MemberEvent', 3.0), ('ForkEvent', 2.0), ('ReleaseEvent', 1.0)], 0, 0, 0, 11, [('CSS', 74.0), ('JavaScript', 60.0), ('Ruby', 12.0), ('TeX', 6.0), ('Python', 6.0), ('Java', 5.0), ('C++', 5.0), ('Assembly', 5.0), ('C', 3.0), ('Emacs Lisp', 2.0), ('Arduino', 2.0)]]
|
||||
```
|
||||
[227.0, {'1': '51', '0': '41', '3': '17', '2': '34', '5': '28', '4': '22', '6': '34'}, [('PushEvent', 154.0), ('CreateEvent', 41.0), ('WatchEvent', 18.0), ('GollumEvent', 8.0), ('MemberEvent', 3.0), ('ForkEvent', 2.0), ('ReleaseEvent', 1.0)], 0, 0, 0, 11, [('CSS', 74.0), ('JavaScript', 60.0), ('Ruby', 12.0), ('TeX', 6.0), ('Python', 6.0), ('Java', 5.0), ('C++', 5.0), ('Assembly', 5.0), ('C', 3.0), ('Emacs Lisp', 2.0), ('Arduino', 2.0)]]
|
||||
```
|
||||
|
||||
有意思的是在这里生成了和自己相近的人
|
||||
|
||||
['alesdokshanin', 'hjiawei', 'andrewreedy', 'christj6', '1995eaton']
|
||||
```
|
||||
['alesdokshanin', 'hjiawei', 'andrewreedy', 'christj6', '1995eaton']
|
||||
```
|
||||
|
||||
[1]: https://www.phodal.com/static/media/uploads/screen_shot_2014-04-15_at_8.11.14_pm.png
|
||||
[2]: https://www.phodal.com/static/media/uploads/screen_shot_2014-04-15_at_8.14.52_pm.png
|
||||
|
||||
osrc最有意思的一部分莫过于flann,当然说的也是系统后台的设计的一个很关键及有意思的部分。
|
||||
|
||||
##Python Github
|
||||
|
|
@ -386,20 +437,24 @@ osrc最有意思的一部分莫过于flann,当然说的也是系统后台的
|
|||
|
||||
换句话说,我们需要一些样本来当作我们的分析资料,这里东西用到的就是我们之前的。
|
||||
|
||||
[227.0, {'1': '51', '0': '41', '3': '17', '2': '34', '5': '28', '4': '22', '6': '34'}, [('PushEvent', 154.0), ('CreateEvent', 41.0), ('WatchEvent', 18.0), ('GollumEvent', 8.0), ('MemberEvent', 3.0), ('ForkEvent', 2.0), ('ReleaseEvent', 1.0)], 0, 0, 0, 11, [('CSS', 74.0), ('JavaScript', 60.0), ('Ruby', 12.0), ('TeX', 6.0), ('Python', 6.0), ('Java', 5.0), ('C++', 5.0), ('Assembly', 5.0), ('C', 3.0), ('Emacs Lisp', 2.0), ('Arduino', 2.0)]]
|
||||
```
|
||||
[227.0, {'1': '51', '0': '41', '3': '17', '2': '34', '5': '28', '4': '22', '6': '34'}, [('PushEvent', 154.0), ('CreateEvent', 41.0), ('WatchEvent', 18.0), ('GollumEvent', 8.0), ('MemberEvent', 3.0), ('ForkEvent', 2.0), ('ReleaseEvent', 1.0)], 0, 0, 0, 11, [('CSS', 74.0), ('JavaScript', 60.0), ('Ruby', 12.0), ('TeX', 6.0), ('Python', 6.0), ('Java', 5.0), ('C++', 5.0), ('Assembly', 5.0), ('C', 3.0), ('Emacs Lisp', 2.0), ('Arduino', 2.0)]]
|
||||
```
|
||||
|
||||
在代码中是构建了一个points.h5的文件来分析每个用户的points,之后再记录到hdf5文件中。
|
||||
|
||||
[ 0.00438596 0.18061674 0.2246696 0.14977974 0.07488987 0.0969163
|
||||
0.12334802 0.14977974 0. 0.18061674 0. 0. 0.
|
||||
0.00881057 0. 0. 0.03524229 0. 0.
|
||||
0.01321586 0. 0. 0. 0.6784141 0.
|
||||
0.07929515 0.00440529 1. 1. 1. 0.08333333
|
||||
0.26431718 0.02202643 0.05286344 0.02643172 0. 0.01321586
|
||||
0.02202643 0. 0. 0. 0. 0. 0.
|
||||
0. 0. 0.00881057 0. 0. 0. 0.
|
||||
0. 0. 0. 0. 0. 0. 0.
|
||||
0. 0. 0. 0. 0.00881057]
|
||||
```
|
||||
[ 0.00438596 0.18061674 0.2246696 0.14977974 0.07488987 0.0969163
|
||||
0.12334802 0.14977974 0. 0.18061674 0. 0. 0.
|
||||
0.00881057 0. 0. 0.03524229 0. 0.
|
||||
0.01321586 0. 0. 0. 0.6784141 0.
|
||||
0.07929515 0.00440529 1. 1. 1. 0.08333333
|
||||
0.26431718 0.02202643 0.05286344 0.02643172 0. 0.01321586
|
||||
0.02202643 0. 0. 0. 0. 0. 0.
|
||||
0. 0. 0.00881057 0. 0. 0. 0.
|
||||
0. 0. 0. 0. 0. 0. 0.
|
||||
0. 0. 0. 0. 0.00881057]
|
||||
```
|
||||
|
||||
这里分析到用户的大部分行为,再找到与其行为相近的用户,主要的行为有下面这些:
|
||||
|
||||
|
|
@ -410,62 +465,67 @@ osrc最有意思的一部分莫过于flann,当然说的也是系统后台的
|
|||
|
||||
osrc中用于解析的代码
|
||||
|
||||
```python
|
||||
def parse_vector(results):
|
||||
points = np.zeros(nvector)
|
||||
total = int(results[0])
|
||||
|
||||
def parse_vector(results):
|
||||
points = np.zeros(nvector)
|
||||
total = int(results[0])
|
||||
points[0] = 1.0 / (total + 1)
|
||||
|
||||
points[0] = 1.0 / (total + 1)
|
||||
# Week means.
|
||||
for k, v in results[1].iteritems():
|
||||
points[1 + int(k)] = float(v) / total
|
||||
|
||||
# Week means.
|
||||
for k, v in results[1].iteritems():
|
||||
points[1 + int(k)] = float(v) / total
|
||||
# Event types.
|
||||
n = 8
|
||||
for k, v in results[2]:
|
||||
points[n + evttypes.index(k)] = float(v) / total
|
||||
|
||||
# Event types.
|
||||
n = 8
|
||||
for k, v in results[2]:
|
||||
points[n + evttypes.index(k)] = float(v) / total
|
||||
# Number of contributions, connections and languages.
|
||||
n += nevts
|
||||
points[n] = 1.0 / (float(results[3]) + 1)
|
||||
points[n + 1] = 1.0 / (float(results[4]) + 1)
|
||||
points[n + 2] = 1.0 / (float(results[5]) + 1)
|
||||
points[n + 3] = 1.0 / (float(results[6]) + 1)
|
||||
|
||||
# Number of contributions, connections and languages.
|
||||
n += nevts
|
||||
points[n] = 1.0 / (float(results[3]) + 1)
|
||||
points[n + 1] = 1.0 / (float(results[4]) + 1)
|
||||
points[n + 2] = 1.0 / (float(results[5]) + 1)
|
||||
points[n + 3] = 1.0 / (float(results[6]) + 1)
|
||||
# Top languages.
|
||||
n += 4
|
||||
for k, v in results[7]:
|
||||
if k in langs:
|
||||
points[n + langs.index(k)] = float(v) / total
|
||||
else:
|
||||
# Unknown language.
|
||||
points[-1] = float(v) / total
|
||||
|
||||
# Top languages.
|
||||
n += 4
|
||||
for k, v in results[7]:
|
||||
if k in langs:
|
||||
points[n + langs.index(k)] = float(v) / total
|
||||
else:
|
||||
# Unknown language.
|
||||
points[-1] = float(v) / total
|
||||
|
||||
return points
|
||||
return points
|
||||
```
|
||||
|
||||
这样也就返回我们需要的点数,然后我们可以用``get_points``来获取这些
|
||||
|
||||
def get_points(usernames):
|
||||
r = redis.StrictRedis(host='localhost', port=6379, db=0)
|
||||
pipe = r.pipeline()
|
||||
```python
|
||||
def get_points(usernames):
|
||||
r = redis.StrictRedis(host='localhost', port=6379, db=0)
|
||||
pipe = r.pipeline()
|
||||
|
||||
results = get_vector(usernames)
|
||||
points = np.zeros([len(usernames), nvector])
|
||||
points = parse_vector(results)
|
||||
return points
|
||||
results = get_vector(usernames)
|
||||
points = np.zeros([len(usernames), nvector])
|
||||
points = parse_vector(results)
|
||||
return points
|
||||
```
|
||||
|
||||
就会得到我们的相应的数据,接着找找和自己邻近的,看看结果。
|
||||
|
||||
[ 0.01298701 0.19736842 0. 0.30263158 0.21052632 0.19736842
|
||||
0. 0.09210526 0. 0.22368421 0.01315789 0. 0.
|
||||
0. 0. 0. 0.01315789 0. 0.
|
||||
0.01315789 0. 0. 0. 0.73684211 0. 0.
|
||||
0. 1. 1. 1. 0.2 0.42105263
|
||||
0.09210526 0. 0. 0. 0. 0.23684211
|
||||
0. 0. 0.03947368 0. 0. 0. 0.
|
||||
0. 0. 0. 0. 0. 0. 0.
|
||||
0. 0. 0. 0. 0. 0. 0.
|
||||
0. 0. 0. 0. ]
|
||||
```
|
||||
[ 0.01298701 0.19736842 0. 0.30263158 0.21052632 0.19736842
|
||||
0. 0.09210526 0. 0.22368421 0.01315789 0. 0.
|
||||
0. 0. 0. 0.01315789 0. 0.
|
||||
0.01315789 0. 0. 0. 0.73684211 0. 0.
|
||||
0. 1. 1. 1. 0.2 0.42105263
|
||||
0.09210526 0. 0. 0. 0. 0.23684211
|
||||
0. 0. 0.03947368 0. 0. 0. 0.
|
||||
0. 0. 0. 0. 0. 0. 0.
|
||||
0. 0. 0. 0. 0. 0. 0.
|
||||
0. 0. 0. 0. ]
|
||||
```
|
||||
|
||||
真看不出来两者有什么相似的地方 。。。。
|
||||
|
|
|
|||
BIN
img/main-events.png
Normal file
BIN
img/main-events.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 18 KiB |
BIN
img/smtwtfs.png
Normal file
BIN
img/smtwtfs.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 8.3 KiB |
Loading…
Reference in a new issue