Skip to content

Commit ac16412

Browse files
committed
wip: add script to get repo/ws trends
1 parent ccabdc3 commit ac16412

1 file changed

Lines changed: 134 additions & 0 deletions

File tree

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Enter one line description here.
4+
5+
File:
6+
7+
Copyright 2025 Ankur Sinha
8+
Author: Ankur Sinha <sanjay DOT ankur AT gmail DOT com>
9+
"""
10+
11+
import requests
12+
import datetime
13+
from dateutil.relativedelta import relativedelta
14+
import matplotlib.pyplot as plt
15+
import matplotlib
16+
17+
# repositories_url = "https://v2.opensourcebrain.org/proxy/workspaces/api/osbrepository?page=1&per_page=4000"
18+
repositories_url = "https://v2.opensourcebrain.org/proxy/workspaces/api/osbrepository?page=1&per_page=40"
19+
workspaces_url = (
20+
"https://v2.opensourcebrain.org/proxy/workspaces/api/workspace?page=1&per_page=1500"
21+
)
22+
23+
24+
def get_repo_metrics():
25+
"""Get metrics on number of repositories on OSBv2"""
26+
resp = requests.get(repositories_url)
27+
28+
resp.raise_for_status()
29+
data = resp.json()
30+
repositories = data["osbrepositories"]
31+
total_number = data["pagination"]["total"]
32+
assert total_number == len(repositories)
33+
print(f">>> Number of repositories: {len(repositories)}")
34+
35+
model_or_data = {"models": 0, "data": 0}
36+
repo_types = {"biomodels": 0, "dandi": 0, "github": 0, "figshare": 0}
37+
creation_time_stamps = []
38+
39+
for repo in repositories:
40+
repo_types[repo["repository_type"]] += 1
41+
if repo["content_types_list"][0] == "modeling":
42+
model_or_data["models"] += 1
43+
else:
44+
model_or_data["data"] += 1
45+
46+
creation_time_stamps.append(
47+
datetime.datetime.fromisoformat(repo["timestamp_created"])
48+
)
49+
50+
print(">>> Type break down:")
51+
print(model_or_data)
52+
print(repo_types)
53+
print(creation_time_stamps[0:10])
54+
print(creation_time_stamps[0:-10])
55+
return creation_time_stamps
56+
57+
58+
def get_workspace_metrics():
59+
"""Get metrics on number of repositories on OSBv2"""
60+
resp = requests.get(workspaces_url)
61+
62+
resp.raise_for_status()
63+
data = resp.json()
64+
workspaces = data["workspaces"]
65+
total_number = data["pagination"]["total"]
66+
assert total_number == len(workspaces)
67+
print(f">>> Number of workspaces: {len(workspaces)}")
68+
69+
public_or_private = {"public": 0, "private": 0}
70+
71+
creation_time_stamps = []
72+
73+
for ws in workspaces:
74+
if ws["publicable"]:
75+
public_or_private["public"] += 1
76+
else:
77+
public_or_private["private"] += 1
78+
creation_time_stamps.append(
79+
datetime.datetime.fromisoformat(ws["timestamp_created"])
80+
)
81+
82+
creation_time_stamps = sorted(creation_time_stamps)
83+
84+
print(">>> Type break down:")
85+
print(public_or_private)
86+
print(creation_time_stamps[0:10])
87+
print(creation_time_stamps[0:-10])
88+
return creation_time_stamps
89+
90+
91+
def plot_trend(title, creation_time_stamps, timewindow=2):
92+
time_now = datetime.datetime.now()
93+
trend_start = datetime.datetime(2021, 1, 1, 0, 0, 0)
94+
95+
plot_data_x = []
96+
plot_data_y = []
97+
98+
index = 0
99+
trend_point = trend_start
100+
for ts in creation_time_stamps:
101+
index += 1
102+
if ts.timestamp() > trend_point.timestamp():
103+
plot_data_x.append(trend_point)
104+
plot_data_y.append(index)
105+
trend_point += relativedelta(months=timewindow)
106+
107+
assert index == len(creation_time_stamps)
108+
plot_data_x.append(time_now)
109+
plot_data_y.append(index)
110+
111+
fig, ax = plt.subplots()
112+
ax.plot(
113+
plot_data_x,
114+
plot_data_y,
115+
)
116+
ax.set_xlabel("Date")
117+
ax.set_ylabel(f"Number of {title}")
118+
ax.spines["top"].set_visible(False)
119+
ax.spines["right"].set_visible(False)
120+
ax.xaxis.set(
121+
major_locator=matplotlib.dates.MonthLocator(interval=timewindow),
122+
major_formatter=matplotlib.dates.DateFormatter("%Y-%m"),
123+
)
124+
125+
plt.xticks(rotation=90)
126+
plt.tight_layout()
127+
plt.show()
128+
129+
130+
if __name__ == "__main__":
131+
repo_timestamps = get_repo_metrics()
132+
plot_trend(title="Repositories", creation_time_stamps=repo_timestamps)
133+
workspace_timestamps = get_workspace_metrics()
134+
plot_trend(title="Workspaces", creation_time_stamps=workspace_timestamps)

0 commit comments

Comments
 (0)