Skip to content

Commit

Permalink
repo-sync-2024-03-18T20:32:30+0800 (#97)
Browse files Browse the repository at this point in the history
  • Loading branch information
6fj authored Mar 18, 2024
1 parent 6cd7aee commit e7d45d4
Show file tree
Hide file tree
Showing 32 changed files with 395 additions and 235 deletions.
8 changes: 4 additions & 4 deletions .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ A clear and concise description of what the bug is.

**To Reproduce**
Steps to reproduce the behavior:
1.
2.
3.
4.
1.
2.
3.
4.

**Expected behavior**
A clear and concise description of what you expected to happen.
Expand Down
5 changes: 5 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
> - `[API]` prefix for API changes.
> - `[Improvement]` prefix for implementation improvement.
## v0.3.0beta
- [Improvement] add uuid in system temp folder.
- [Improvement] use arrow csv reader in pir.
- [Bugfix] fix typo in psi config check.

## v0.3.0.dev240304
- [API] expose ic_mode in RunLegacyPsi api

Expand Down
4 changes: 2 additions & 2 deletions examples/psi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ $ bazel build //psi:main -c opt
2. Generate test data

```bash
$ python examples/psi/test_data_generator.py --receiver_item_cnt 1e6 \
$ python examples/psi/generate_psi_data.py --receiver_item_cnt 1e6 \
--sender_item_cnt 1e6 --intersection_cnt 8e4 --id_cnt 2 \
--receiver_path /tmp/receiver_input.csv \
--sender_path /tmp/sender_input.csv \
Expand Down Expand Up @@ -56,7 +56,7 @@ $ bazel build //psi:main -c opt
2. Generate test data

```bash
$ python examples/psi/test_data_generator.py --receiver_item_cnt 1e3 \
$ python examples/psi/generate_psi_data.py --receiver_item_cnt 1e3 \
--sender_item_cnt 1e6 --intersection_cnt 1e2 --id_cnt 2 \
--receiver_path /tmp/client_input.csv \
--sender_path /tmp/server_input.csv \
Expand Down
4 changes: 2 additions & 2 deletions examples/psi/config/ecdh_client_full.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
"path": "/tmp/client_output.csv"
},
"keys": [
"id0",
"id1"
"id_0",
"id_1"
],
"cache_path": "/tmp/client_cache"
},
Expand Down
4 changes: 2 additions & 2 deletions examples/psi/config/ecdh_client_online.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
"path": "/tmp/client_output.csv"
},
"keys": [
"id0",
"id1"
"id_0",
"id_1"
],
"cache_path": "/tmp/client_cache"
},
Expand Down
4 changes: 2 additions & 2 deletions examples/psi/config/ecdh_receiver_inner_join.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
"path": "/tmp/ecdh_receiver_inner_join_output.csv"
},
"keys": [
"id0",
"id1"
"id_0",
"id_1"
],
"debug_options": {
"trace_path": "/tmp/ecdh_receiver_inner_join.trace"
Expand Down
4 changes: 2 additions & 2 deletions examples/psi/config/ecdh_receiver_recovery.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
"path": "/tmp/ecdh_receiver_recovery_output.csv"
},
"keys": [
"id0",
"id1"
"id_0",
"id_1"
],
"debug_options": {
"trace_path": "/tmp/ecdh_receiver_recovery.trace"
Expand Down
4 changes: 2 additions & 2 deletions examples/psi/config/ecdh_sender_inner_join.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
"path": "/tmp/ecdh_sender_inner_join_output.csv"
},
"keys": [
"id0",
"id1"
"id_0",
"id_1"
],
"debug_options": {
"trace_path": "/tmp/ecdh_sender_inner_join.trace"
Expand Down
4 changes: 2 additions & 2 deletions examples/psi/config/ecdh_sender_recovery.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
"path": "/tmp/ecdh_sender_recovery_output.csv"
},
"keys": [
"id0",
"id1"
"id_0",
"id_1"
],
"debug_options": {
"trace_path": "/tmp/ecdh_sender_recovery.trace"
Expand Down
4 changes: 2 additions & 2 deletions examples/psi/config/ecdh_server_full.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
"path": "/tmp/server_input.csv"
},
"keys": [
"id0",
"id1"
"id_0",
"id_1"
]
},
"link_config": {
Expand Down
4 changes: 2 additions & 2 deletions examples/psi/config/ecdh_server_offline.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
"path": "/tmp/server_input.csv"
},
"keys": [
"id0",
"id1"
"id_0",
"id_1"
],
"server_secret_key_path": "/tmp/server_secret_key.key"
},
Expand Down
4 changes: 2 additions & 2 deletions examples/psi/config/ecdh_server_offline_gen_cache.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
"path": "/tmp/server_input.csv"
},
"keys": [
"id0",
"id1"
"id_0",
"id_1"
],
"server_secret_key_path": "/tmp/server_secret_key.key",
"cache_path": "/tmp/server_cache.sf"
Expand Down
4 changes: 2 additions & 2 deletions examples/psi/config/kkrt_receiver_recovery.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
"path": "/tmp/kkrt_receiver_recovery_output.csv"
},
"keys": [
"id0",
"id1"
"id_0",
"id_1"
],
"debug_options": {
"trace_path": "/tmp/kkrt_receiver_recovery.trace"
Expand Down
4 changes: 2 additions & 2 deletions examples/psi/config/kkrt_sender_recovery.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
"path": "/tmp/kkrt_sender_recovery_output.csv"
},
"keys": [
"id0",
"id1"
"id_0",
"id_1"
],
"debug_options": {
"trace_path": "/tmp/kkrt_sender_recovery.trace"
Expand Down
4 changes: 2 additions & 2 deletions examples/psi/config/rr22_receiver_recovery.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
"path": "/tmp/rr22_receiver_recovery_output.csv"
},
"keys": [
"id0",
"id1"
"id_0",
"id_1"
],
"debug_options": {
"trace_path": "/tmp/rr22_receiver_recovery.trace"
Expand Down
4 changes: 2 additions & 2 deletions examples/psi/config/rr22_sender_recovery.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
"path": "/tmp/rr22_sender_recovery_output.csv"
},
"keys": [
"id0",
"id1"
"id_0",
"id_1"
],
"debug_options": {
"trace_path": "/tmp/rr22_sender_recovery.trace"
Expand Down
193 changes: 193 additions & 0 deletions examples/psi/generate_psi_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# Copyright 2023 Ant Group Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import argparse
import csv
import uuid

import pyarrow as pa
import pyarrow.csv as csv

batch_size = 10000


def create_table(id_cnt: int, label_cnt: int):
assert id_cnt >= 0 and label_cnt >= 0
names = [f"id_{i}" for i in range(id_cnt)] + [
f"label_{i}" for i in range(label_cnt)
]

arrays = [
pa.array([uuid.uuid4().hex], type=pa.string())
for _ in range(id_cnt + label_cnt)
]

return pa.table(arrays, names=names)


def write_table(
id_cnt: int,
label_cnt: int,
intersection_cnt: int,
difference_cnt: int,
output_path: str,
difference_id_prefix: str,
):
assert (
id_cnt >= 0 and label_cnt >= 0 and intersection_cnt > 0 and difference_cnt >= 0
)

if difference_cnt:
assert len(difference_id_prefix) > 0

table = create_table(id_cnt, label_cnt)

with csv.CSVWriter(output_path, table.schema) as table_writer:

cnt = 0
while cnt < intersection_cnt:
table_row = min(batch_size, intersection_cnt - cnt)

arrays = []
for idx in range(id_cnt):
arrays.append(
pa.array([f"i_{idx}_{i}" for i in range(cnt, cnt + table_row)])
)

for idx in range(label_cnt):
arrays.append(
pa.array(
[uuid.uuid4().hex for _ in range(cnt, cnt + table_row)],
type=pa.string(),
)
)

t = pa.table(arrays, names=table.schema.names)

table_writer.write_table(t)

cnt += table_row

cnt = 0
while cnt < difference_cnt:
table_row = min(batch_size, difference_cnt - cnt)

arrays = []
for idx in range(id_cnt):
arrays.append(
pa.array(
[
f"{difference_id_prefix}_{idx}_{i}"
for i in range(cnt, cnt + table_row)
]
)
)

for idx in range(label_cnt):
arrays.append(
pa.array(
[uuid.uuid4().hex for _ in range(cnt, cnt + table_row)],
type=pa.string(),
)
)

t = pa.table(arrays, names=table.schema.names)

table_writer.write_table(t)

cnt += table_row


def gen_test_data(
receiver_item_cnt: int,
sender_item_cnt: int,
intersection_cnt: int,
id_cnt: int,
receiver_label_cnt: int,
sender_label_cnt: int,
receiver_path: str,
sender_path: str,
intersection_path: str,
seed: int,
) -> None:
assert (
receiver_item_cnt > 0
and sender_item_cnt > 0
and intersection_cnt > 0
and receiver_label_cnt >= 0
and sender_label_cnt >= 0
)

assert receiver_item_cnt >= intersection_cnt and sender_item_cnt >= intersection_cnt

write_table(id_cnt, 0, intersection_cnt, 0, intersection_path, "")
write_table(
id_cnt,
receiver_label_cnt,
intersection_cnt,
receiver_item_cnt - intersection_cnt,
receiver_path,
"r",
)
write_table(
id_cnt,
sender_label_cnt,
intersection_cnt,
sender_item_cnt - intersection_cnt,
sender_path,
"s",
)

print("## Test data generated:")
print(f"receiver_item_cnt = {receiver_item_cnt} at {receiver_path}.")
print(f"sender_item_cnt = {sender_item_cnt} at {sender_path}.")
print(f"intersection_cnt = {intersection_cnt} at {intersection_path}.")


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="psi test data generator.")
parser.add_argument(
"--receiver_item_cnt", help="Item cnt for receiver.", default="1e4"
)
parser.add_argument("--sender_item_cnt", help="Item cnt for sender.", default="1e4")
parser.add_argument(
"--intersection_cnt", help="Item cnt for intersection.", default="8e3"
)
parser.add_argument("--id_cnt", help="Id col cnt.", default=1, type=int)
parser.add_argument("--receiver_label_cnt", help="Id col cnt.", default=0, type=int)
parser.add_argument("--sender_label_cnt", help="Id col cnt.", default=0, type=int)
parser.add_argument(
"--receiver_path", help="Receiver path.", default="receiver.csv"
)
parser.add_argument("--sender_path", help="Sender path.", default="sender.csv")
parser.add_argument(
"--intersection_path", help="Intersection path.", default="intersection.csv"
)
parser.add_argument("--seed", help="Random seed.", default=0, type=int)

args = parser.parse_args()

gen_test_data(
receiver_item_cnt=int(float(args.receiver_item_cnt)),
sender_item_cnt=int(float(args.sender_item_cnt)),
intersection_cnt=int(float(args.intersection_cnt)),
id_cnt=args.id_cnt,
receiver_label_cnt=args.receiver_label_cnt,
sender_label_cnt=args.sender_label_cnt,
receiver_path=args.receiver_path,
sender_path=args.sender_path,
intersection_path=args.intersection_path,
seed=args.seed,
)
1 change: 1 addition & 0 deletions examples/psi/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pyarrow
Loading

0 comments on commit e7d45d4

Please sign in to comment.