728x90
복습하기 위해 학부 수업 내용을 필기한 내용입니다.
이해를 제대로 하지 못하고 정리한 경우 틀린 내용이 있을 수 있습니다.
그러한 부분에 대해서는 알려주시면 정말 감사하겠습니다.
2022.11.22 - [컴퓨터공학/알고리즘2] - [알고리즘2] Undirected and Directed Graphs
▶구현된 API 정리
class Digraph: # Digraph 객체를 저장하는 클래스
def reverse(self): # 이 객체가 나타내는 그래프의 reverse graph 객체 만들어 반환
class DFS: # Digraph 객체에 DFS를 수행하고 결과를 저장하는 클래스
class BFS: # Digraph 객체에 BFS를 수행하고 결과를 저장하는 클래스
def webCrawl(roots, maxDepth=1): # 웹주소의 리스트 roots에서 시작해 링크를 따라 웹주소를 수집하는 함수
def topologicalSort(g): # Digraph 객체 g에 대한 topological order를 구해 반환하는 함수
728x90
▶구현할 API 정리 : class SCC
class SCC:
def __init__(self, g):
# SCC 생성자. Digraph 객체 g에 대한 strongly-connected component 구해
# 멤버 변수 self.id[]와 self.count에 결과 저장
# self.id[v]: v가 속한 component id를 저장하며, component id는 0부터 시작해 1씩 증가
# self.count: component의 수를 저장
#
# (1) g의 reverse 그래프에 대한 topological order 얻기
# (2) 앞에서 얻은 순서에서 아직 방문하지 않은 각 정점 v에 대해:
# v에서 시작해 DFS 혹은 BFS 수행하면서
# 아직 방문하지 않은 모든 정점 w를 방문하되
# 이들을 모두 같은 component로 표시 (즉 self.id[w]에 같은 번호 저장)
def connected(self, v, w):
# v와 w가 서로 연결된 경우 (같은 SCC에 속하는 경우) True 반환하고
# 그렇지 않으면 False를 반환
#
# __init__()를 수행한 결과가 저장된 self.id[v]와 self.id[w] 활용해 결과값
▶그 외 유의사항
SCC 클래스의 멤버 변수에 대한 아래 조건을 지켜야 한다.
self.count는 component의 수를 저장한다.
self.id[v]는 정점 v가 속한 component의 id를 저장하며 component id는 0 ~ (self.count-1) 범위에 속한 정수여야 한다.
그때까지 찾은 SCC의 count를 id로 사용하면 위 조건 만족한다. (UndirectedGraph.py의 CC 클래스 참조)
또한 수행 시간에 대한 다음 조건을 지켜야 한다.
connected(v, w) 함수의 수행 시간은 상수 시간이어야 하며, 정점 개수 V나 간선 개수 E에 비례해서는 안 된다.
즉 SCC 탐색은 SCC 클래스 객체 생성 시 한 번 수행해야 하며 그 후에 connected()를 호출할 때마다 수행하면 안된다.
▶코드
import requests # For reading web pages
import re # For searching with regular expressions
import timeit
from queue import Queue
'''
Class for storing directed graphs
'''
class Digraph:
def __init__(self, V): # Constructor
self.V = V # Number of vertices
self.E = 0 # Number of edges
self.adj = [[] for _ in range(V)] # adj[v] is a list of vertices pointed from v
def addEdge(self, v, w): # Add a directed edge v->w. Self-loops and parallel edges are allowed
self.adj[v].append(w)
self.E += 1
def outDegree(self, v):
return len(self.adj[v])
def __str__(self):
rtList = [f"{self.V} vertices and {self.E} edges\n"]
for v in range(self.V):
for w in self.adj[v]:
rtList.append(f"{v}->{w}\n")
return "".join(rtList)
def reverse(self): # return a digraph with all edges reversed
g = Digraph(self.V)
for v in range(self.V):
for w in self.adj[v]: g.addEdge(w, v)
return g
'''
Class for storing the results of depth-first search
'''
class DFS:
# Constructor
# Perform DFS on graph g starting from the source vertex s
def __init__(self, g, s):
def recur(v):
self.visited[v] = True
for w in g.adj[v]:
if not self.visited[w]:
recur(w)
self.fromVertex[w] = v
assert (isinstance(g, Digraph) and s >= 0 and s < g.V)
self.g, self.s = g, s
self.visited = [False for _ in range(g.V)]
self.fromVertex = [None for _ in range(g.V)]
recur(s)
# Return a list of vertices on the path from s to v
# based on the results of DFS
def pathTo(self, v):
if not self.visited[v]: return None
path = []
while v != self.s:
path.append(v)
v = self.fromVertex[v]
path.append(self.s)
path.reverse()
return path
def hasPathTo(self, v):
return self.visited[v]
# This function is used to evaluate the speed of SCC class
#
def DFSforEvaluation(g):
def recur(v):
visited[v] = True
for w in g.adj[v]:
if not visited[w]:
recur(w)
fromVertex[w] = v
assert (isinstance(g, Digraph))
visited = [False for _ in range(g.V)]
fromVertex = [None for _ in range(g.V)]
for v in range(g.V):
if not visited[v]:
recur(v)
return visited, fromVertex
'''
Class for storing the results of breadth-first search
'''
class BFS:
# Constructor
# PerformBDFS on graph g starting from the source vertex s
def __init__(self, g, s):
assert (isinstance(g, Digraph) and s >= 0 and s < g.V)
self.g, self.s = g, s
self.visited = [False for _ in range(g.V)]
self.fromVertex = [None for _ in range(g.V)]
self.distance = [None for _ in range(g.V)]
queue = Queue()
queue.put(s)
self.visited[s] = True
self.distance[s] = 0
while queue.qsize() > 0:
v = queue.get()
for w in g.adj[v]:
if not self.visited[w]:
queue.put(w)
self.visited[w] = True
self.fromVertex[w] = v
self.distance[w] = self.distance[v] + 1
# Return a list of vertices on the path from s to v
# based on the results of DFS
def pathTo(self, v):
if not self.visited[v]: return None
path = []
while v != self.s:
path.append(v)
v = self.fromVertex[v]
path.append(self.s)
path.reverse()
return path
def hasPathTo(self, v):
return self.visited[v]
def distTo(self, v):
return self.distance[v]
'''
Discover web addresses through BFS, starting from root addresses as the source set
roots: list of web addresses to use as the source set
'''
webaddrPattern = re.compile(
"https://(?:\\w+\\.)+(?:\\w+)") # Regex pattern for a web address. '?:' indicates a non-capturing group
def webCrawl(roots, maxDepth=1):
queue = Queue()
discovered = {} # Symbol table of discovered sites and depth, where depth is the distance from sources
for v in roots:
queue.put(v)
discovered[v] = 0
while queue.qsize() > 0:
v = queue.get()
depth = discovered[v]
if depth > maxDepth: break
try:
resp = requests.get(v) # Receive response by visiting the web site at v
if resp.status_code == 200: # HTTP status code '200 OK' means the page was successfully retrieved
print(v, f"(depth={depth})")
for w in webaddrPattern.findall(resp.text):
if w not in discovered:
discovered[w] = depth + 1
queue.put(w)
except requests.exceptions.ConnectionError as error:
pass
'''
Perform the topological sort on a DAG g, and return list of vertices in reverse DFS postorder
'''
def topologicalSort(g):
def recur(v):
visited[v] = True
for w in g.adj[v]:
if not visited[w]: recur(w)
reverseList.append(v) # Add v to the stack if all adjacent vertices were visited
assert (isinstance(g, Digraph))
visited = [False for _ in range(g.V)]
reverseList = []
for v in range(g.V):
if not visited[v]: recur(v)
reverseList.reverse()
return reverseList
'''
Class that finds SCC (Strongly-Connected Components) and stores the results
'''
class SCC:
def __init__(self, g): # Do strongly-connected-components pre-processing, based on Kosaraju-Sharir algorithm
def recur(v):
self.id[v] = self.count
for w in g.adj[v]:
if self.id[w] < 0:
recur(w)
top_g = topologicalSort(g.reverse())
# g = g.reverse()
self.id = [-1 for i in range(g.V)]
self.count = 0
for v in top_g:
if self.id[v] < 0:
recur(v)
self.count += 1
def connected(self, v, w): # Are v and w connected?
return self.id[v] == self.id[w]
if __name__ == "__main__":
'''
# Unit test for Digraph
g = Digraph(13)
g.addEdge(0,1)
g.addEdge(0,5)
g.addEdge(2,0)
g.addEdge(2,3)
g.addEdge(3,2)
g.addEdge(3,5)
g.addEdge(4,2)
g.addEdge(4,3)
g.addEdge(5,4)
g.addEdge(6,0)
g.addEdge(6,4)
g.addEdge(6,8)
g.addEdge(6,9)
g.addEdge(7,6)
g.addEdge(7,9)
g.addEdge(8,6)
g.addEdge(9,10)
g.addEdge(9,11)
g.addEdge(10,12)
g.addEdge(11,4)
g.addEdge(11,12)
g.addEdge(12,9)
print(g)
print(g.adj[0], g.outDegree(0))
print(g.adj[5], g.outDegree(5))
print(g.adj[9], g.outDegree(9))
gr = g.reverse()
print(gr)
print(gr.adj[0], gr.outDegree(0))
print(gr.adj[5], gr.outDegree(5))
print(gr.adj[9], gr.outDegree(9))
dfs = DFS(g,0)
print(dfs.visited, dfs.fromVertex)
print(dfs.pathTo(0))
print(dfs.pathTo(1))
print(dfs.pathTo(2))
print(dfs.pathTo(3))
print(dfs.pathTo(4))
print(dfs.pathTo(5))
print(dfs.pathTo(6))
print(dfs.pathTo(7))
print(dfs.hasPathTo(6))
print(dfs.hasPathTo(7))
bfs = BFS(g,0)
print(bfs.visited, bfs.fromVertex)
print(bfs.pathTo(0), bfs.distTo(0))
print(bfs.pathTo(1), bfs.distTo(1))
print(bfs.pathTo(2), bfs.distTo(2))
print(bfs.pathTo(3), bfs.distTo(3))
print(bfs.pathTo(4), bfs.distTo(4))
print(bfs.pathTo(5), bfs.distTo(5))
print(bfs.pathTo(6), bfs.distTo(6))
print(bfs.pathTo(7), bfs.distTo(7))
print(dfs.hasPathTo(6))
print(dfs.hasPathTo(7))
'''
'''
# Unit test for Web crawling
#resp = requests.get("https://www.naver.com/")
#print(resp.text)
webCrawl(["https://www.naver.com/", "https://www.daum.net/"])
'''
'''
# Unit test for topological Sort
tasks = Digraph(7)
tasks.addEdge(0,1)
tasks.addEdge(0,2)
tasks.addEdge(0,5)
tasks.addEdge(1,4)
tasks.addEdge(3,2)
tasks.addEdge(3,4)
tasks.addEdge(3,5)
tasks.addEdge(3,6)
tasks.addEdge(5,2)
tasks.addEdge(6,0)
tasks.addEdge(6,4)
print(topologicalSort(tasks))
g5 = Digraph(5)
g5.addEdge(0,1)
g5.addEdge(1,3)
g5.addEdge(1,2)
g5.addEdge(2,3)
g5.addEdge(3,4)
g5.addEdge(2,4)
print("g5, topological order", topologicalSort(g5))
print("g5r, topological order", topologicalSort(g5.reverse()))
'''
# Unit test for Kosaraju-Sharir for Finding Strongly-Connected Components
g1 = Digraph(6)
g1.addEdge(0, 1)
g1.addEdge(1, 2)
g1.addEdge(2, 4)
g1.addEdge(3, 5)
g1.addEdge(4, 0)
g1.addEdge(4, 1)
g1.addEdge(5, 3)
g1.addEdge(5, 4)
scc1 = SCC(g1)
print("scc1.count, scc1.id", scc1.count, scc1.id)
if scc1.count == 2:
print("pass")
else:
print("fail")
if all([idx >= 0 and idx < scc1.count for idx in scc1.id]):
print("pass")
else:
print("fail")
print("scc1.connected(1,4)", scc1.connected(1, 4))
if scc1.connected(1, 4):
print("pass")
else:
print("fail")
print("scc1.connected(2,5)", scc1.connected(2, 5))
if not scc1.connected(2, 5):
print("pass")
else:
print("fail")
print("scc1.connected(3,5)", scc1.connected(3, 5))
if scc1.connected(3, 5):
print("pass")
else:
print("fail")
print()
g2 = Digraph(7)
g2.addEdge(0, 1)
g2.addEdge(1, 2)
g2.addEdge(1, 3)
g2.addEdge(2, 3)
g2.addEdge(2, 4)
g2.addEdge(3, 4)
g2.addEdge(5, 6)
g2.addEdge(6, 2)
g2.addEdge(6, 5)
scc2 = SCC(g2)
print("scc2.count, scc2.id", scc2.count, scc2.id)
print("scc2.connected(0,2)", scc2.connected(0, 2))
print("scc2.connected(2,4)", scc2.connected(2, 4))
print("scc2.connected(4,5)", scc2.connected(4, 5))
print("scc2.connected(5,6)", scc2.connected(5, 6))
print()
g3 = Digraph(13)
g3.addEdge(0, 1)
g3.addEdge(0, 5)
g3.addEdge(2, 0)
g3.addEdge(2, 3)
g3.addEdge(3, 2)
g3.addEdge(3, 5)
g3.addEdge(4, 2)
g3.addEdge(4, 3)
g3.addEdge(5, 4)
g3.addEdge(6, 0)
g3.addEdge(6, 4)
g3.addEdge(6, 8)
g3.addEdge(6, 9)
g3.addEdge(7, 6)
g3.addEdge(7, 9)
g3.addEdge(8, 6)
g3.addEdge(9, 10)
g3.addEdge(9, 11)
g3.addEdge(10, 12)
g3.addEdge(11, 4)
g3.addEdge(11, 12)
g3.addEdge(12, 9)
print("topologicalSort(g3.reverse())", topologicalSort(g3.reverse()))
print("topologicalSort(g3)", topologicalSort(g3))
scc3 = SCC(g3)
print("scc3.count, scc3.id", scc3.count, scc3.id)
print("scc3.connected(0,3)", scc3.connected(0, 3))
print("scc3.connected(0,7)", scc3.connected(0, 7))
print("scc3.connected(0,9)", scc3.connected(0, 9))
print("scc3.connected(7,8)", scc3.connected(7, 8))
print("scc3.connected(7,11)", scc3.connected(7, 11))
print("scc3.connected(10,12)", scc3.connected(10, 12))
print()
# Unit test for speed
n = 10000
tSCC = timeit.timeit(lambda: scc3.connected(4, 5), number=n) / n
tDFS = timeit.timeit(lambda: DFSforEvaluation(g3), number=n) / n
print(
f"{n} calls of connected() on g3 took {tSCC:.10f} sec on average, and the same number of calls of DFS() took {tDFS:.10f} sec on average")
if tSCC * 10 < tDFS:
print("pass")
else:
print("fail")
728x90
'컴퓨터공학 > 알고리즘2' 카테고리의 다른 글
[알고리즘2] Minimum Spanning Tree(MST) (0) | 2022.12.08 |
---|---|
[알고리즘2] Cycle Detection and WordNet (0) | 2022.11.26 |
[알고리즘2] Undirected and Directed Graphs (1) | 2022.11.22 |
[알고리즘2] Slider Puzzle 구현 with PQ - 실습 (0) | 2022.10.21 |
[알고리즘2] Priority Queue (0) | 2022.10.21 |