shamashii
2017-11-21 22:36:24 +08:00
生成 110s,比较 120s,实验时感觉坑点竟然在于生成随机字符串效率,求改进
```
import timeit
def main():
import h5py, cyrandom
allchr = "".join((chr(i) for i in range(33,127)))
pspool = [[cyrandom.choice(allchr) for _ in range(cyrandom.randint(10, 100))] for x in range(100000)]
chunkl = []
for _ in range(5000000):
b1 = cyrandom.choice(pspool)
cyrandom.shuffle(b1)
chunkl.append(''.join(b1).encode('utf-8'))
f = h5py.File('h5.h5','w')
for k in range(50000000//5000000):
l = [str(k).encode('utf-8')]
# cyrandom.shuffle(chunkl)
print(k)
f.create_dataset(str(k), data=chunkl+l,)
del chunkl
f.close()
def query():
import h5py
f = h5py.File('h5.h5','a')
wbw = set(f['0'].value)
count = []
for k in f.keys():
print(k)
for x in f[k].value:
if x not in wbw:
count.append(x)
print(count)
f.close()
print(timeit.timeit(main, number=1))
print(timeit.timeit(query, number=1))
```