컴퓨터공학/알고리즘2

[알고리즘2] DirectedGraph에 SCC 구현 - 실습

NIMHO 2022. 11. 22. 14:00
728x90

복습하기 위해 학부 수업 내용을 필기한 내용입니다.
이해를 제대로 하지 못하고 정리한 경우 틀린 내용이 있을 수 있습니다.
그러한 부분에 대해서는 알려주시면 정말 감사하겠습니다.

2022.11.22 - [컴퓨터공학/알고리즘2] - [알고리즘2] Undirected and Directed Graphs

 

[알고리즘2] Undirected and Directed Graphs

복습하기 위해 학부 수업 내용을 필기한 내용입니다. 이해를 제대로 하지 못하고 정리한 경우 틀린 내용이 있을 수 있습니다. 그러한 부분에 대해서는 알려주시면 정말 감사하겠습니다. ▶Content

dhalsdl12.tistory.com

구현된 API 정리

class Digraph: # Digraph 객체를 저장하는 클래스
	def reverse(self): # 이 객체가 나타내는 그래프의 reverse graph 객체 만들어 반환
    
class DFS: # Digraph 객체에 DFS를 수행하고 결과를 저장하는 클래스

class BFS: # Digraph 객체에 BFS를 수행하고 결과를 저장하는 클래스

def webCrawl(roots, maxDepth=1): # 웹주소의 리스트 roots에서 시작해 링크를 따라 웹주소를 수집하는 함수

def topologicalSort(g): # Digraph 객체 g에 대한 topological order를 구해 반환하는 함수
728x90

구현할 API 정리 : class SCC

class SCC:
	def __init__(self, g):
	# SCC 생성자. Digraph 객체 g에 대한 strongly-connected component 구해
	# 멤버 변수 self.id[]와 self.count에 결과 저장
	# self.id[v]: v가 속한 component id를 저장하며, component id는 0부터 시작해 1씩 증가
	# self.count: component의 수를 저장
	#
	# (1) g의 reverse 그래프에 대한 topological order 얻기
	# (2) 앞에서 얻은 순서에서 아직 방문하지 않은 각 정점 v에 대해:
	# v에서 시작해 DFS 혹은 BFS 수행하면서
	# 아직 방문하지 않은 모든 정점 w를 방문하되
	# 이들을 모두 같은 component로 표시 (즉 self.id[w]에 같은 번호 저장)
	
    def connected(self, v, w):
	# v와 w가 서로 연결된 경우 (같은 SCC에 속하는 경우) True 반환하고
	# 그렇지 않으면 False를 반환
	# 
	# __init__()를 수행한 결과가 저장된 self.id[v]와 self.id[w] 활용해 결과값

 

그 외 유의사항

SCC 클래스의 멤버 변수에 대한 아래 조건을 지켜야 한다.

 

self.count는 component의 수를 저장한다.

 

self.id[v]는 정점 v가 속한 component의 id를 저장하며 component id는 0 ~ (self.count-1) 범위에 속한 정수여야 한다.

그때까지 찾은 SCC의 count를 id로 사용하면 위 조건 만족한다. (UndirectedGraph.py의 CC 클래스 참조)

 

또한 수행 시간에 대한 다음 조건을 지켜야 한다.

connected(v, w) 함수의 수행 시간은 상수 시간이어야 하며, 정점 개수 V나 간선 개수 E에 비례해서는 안 된다.

즉 SCC 탐색은 SCC 클래스 객체 생성 시 한 번 수행해야 하며 그 후에 connected()를 호출할 때마다 수행하면 안된다.

코드

import requests  # For reading web pages
import re  # For searching with regular expressions
import timeit
from queue import Queue

'''
Class for storing directed graphs
'''
class Digraph:
    def __init__(self, V):  # Constructor
        self.V = V  # Number of vertices
        self.E = 0  # Number of edges
        self.adj = [[] for _ in range(V)]  # adj[v] is a list of vertices pointed from v

    def addEdge(self, v, w):  # Add a directed edge v->w. Self-loops and parallel edges are allowed
        self.adj[v].append(w)
        self.E += 1

    def outDegree(self, v):
        return len(self.adj[v])

    def __str__(self):
        rtList = [f"{self.V} vertices and {self.E} edges\n"]
        for v in range(self.V):
            for w in self.adj[v]:
                rtList.append(f"{v}->{w}\n")
        return "".join(rtList)

    def reverse(self):  # return a digraph with all edges reversed
        g = Digraph(self.V)
        for v in range(self.V):
            for w in self.adj[v]: g.addEdge(w, v)
        return g


'''
Class for storing the results of depth-first search
'''
class DFS:
    # Constructor
    # Perform DFS on graph g starting from the source vertex s
    def __init__(self, g, s):
        def recur(v):
            self.visited[v] = True
            for w in g.adj[v]:
                if not self.visited[w]:
                    recur(w)
                    self.fromVertex[w] = v

        assert (isinstance(g, Digraph) and s >= 0 and s < g.V)
        self.g, self.s = g, s
        self.visited = [False for _ in range(g.V)]
        self.fromVertex = [None for _ in range(g.V)]
        recur(s)

        # Return a list of vertices on the path from s to v

    #     based on the results of DFS
    def pathTo(self, v):
        if not self.visited[v]: return None
        path = []
        while v != self.s:
            path.append(v)
            v = self.fromVertex[v]
        path.append(self.s)
        path.reverse()
        return path

    def hasPathTo(self, v):
        return self.visited[v]


# This function is used to evaluate the speed of SCC class
#
def DFSforEvaluation(g):
    def recur(v):
        visited[v] = True
        for w in g.adj[v]:
            if not visited[w]:
                recur(w)
                fromVertex[w] = v

    assert (isinstance(g, Digraph))
    visited = [False for _ in range(g.V)]
    fromVertex = [None for _ in range(g.V)]
    for v in range(g.V):
        if not visited[v]:
            recur(v)

    return visited, fromVertex


'''
Class for storing the results of breadth-first search
'''
class BFS:
    # Constructor
    # PerformBDFS on graph g starting from the source vertex s
    def __init__(self, g, s):
        assert (isinstance(g, Digraph) and s >= 0 and s < g.V)
        self.g, self.s = g, s
        self.visited = [False for _ in range(g.V)]
        self.fromVertex = [None for _ in range(g.V)]
        self.distance = [None for _ in range(g.V)]
        queue = Queue()
        queue.put(s)
        self.visited[s] = True
        self.distance[s] = 0
        while queue.qsize() > 0:
            v = queue.get()
            for w in g.adj[v]:
                if not self.visited[w]:
                    queue.put(w)
                    self.visited[w] = True
                    self.fromVertex[w] = v
                    self.distance[w] = self.distance[v] + 1

    # Return a list of vertices on the path from s to v
    #     based on the results of DFS
    def pathTo(self, v):
        if not self.visited[v]: return None
        path = []
        while v != self.s:
            path.append(v)
            v = self.fromVertex[v]
        path.append(self.s)
        path.reverse()
        return path

    def hasPathTo(self, v):
        return self.visited[v]

    def distTo(self, v):
        return self.distance[v]


'''
Discover web addresses through BFS, starting from root addresses as the source set
    roots: list of web addresses to use as the source set
'''
webaddrPattern = re.compile(
    "https://(?:\\w+\\.)+(?:\\w+)")  # Regex pattern for a web address. '?:' indicates a non-capturing group


def webCrawl(roots, maxDepth=1):
    queue = Queue()
    discovered = {}  # Symbol table of discovered sites and depth, where depth is the distance from sources
    for v in roots:
        queue.put(v)
        discovered[v] = 0

    while queue.qsize() > 0:
        v = queue.get()
        depth = discovered[v]
        if depth > maxDepth: break

        try:
            resp = requests.get(v)  # Receive response by visiting the web site at v
            if resp.status_code == 200:  # HTTP status code '200 OK' means the page was successfully retrieved
                print(v, f"(depth={depth})")
                for w in webaddrPattern.findall(resp.text):
                    if w not in discovered:
                        discovered[w] = depth + 1
                        queue.put(w)
        except requests.exceptions.ConnectionError as error:
            pass


'''
Perform the topological sort on a DAG g, and return list of vertices in reverse DFS postorder
'''
def topologicalSort(g):
    def recur(v):
        visited[v] = True
        for w in g.adj[v]:
            if not visited[w]: recur(w)
        reverseList.append(v)  # Add v to the stack if all adjacent vertices were visited

    assert (isinstance(g, Digraph))
    visited = [False for _ in range(g.V)]
    reverseList = []
    for v in range(g.V):
        if not visited[v]: recur(v)

    reverseList.reverse()
    return reverseList


'''
Class that finds SCC (Strongly-Connected Components) and stores the results    
'''
class SCC:
    def __init__(self, g):  # Do strongly-connected-components pre-processing, based on Kosaraju-Sharir algorithm
        def recur(v):
            self.id[v] = self.count
            for w in g.adj[v]:
                if self.id[w] < 0:
                    recur(w)

        top_g = topologicalSort(g.reverse())
        # g = g.reverse()
        self.id = [-1 for i in range(g.V)]
        self.count = 0
        for v in top_g:
            if self.id[v] < 0:
                recur(v)
                self.count += 1

    def connected(self, v, w):  # Are v and w connected?
        return self.id[v] == self.id[w]


if __name__ == "__main__":
    '''
    # Unit test for Digraph
    g = Digraph(13)
    g.addEdge(0,1)
    g.addEdge(0,5)
    g.addEdge(2,0)
    g.addEdge(2,3)
    g.addEdge(3,2)
    g.addEdge(3,5)
    g.addEdge(4,2)
    g.addEdge(4,3)
    g.addEdge(5,4)
    g.addEdge(6,0)
    g.addEdge(6,4)
    g.addEdge(6,8)
    g.addEdge(6,9)
    g.addEdge(7,6)
    g.addEdge(7,9)
    g.addEdge(8,6)
    g.addEdge(9,10)
    g.addEdge(9,11)
    g.addEdge(10,12)
    g.addEdge(11,4)
    g.addEdge(11,12)
    g.addEdge(12,9)
    print(g)    
    print(g.adj[0], g.outDegree(0))    
    print(g.adj[5], g.outDegree(5))
    print(g.adj[9], g.outDegree(9))

    gr = g.reverse()
    print(gr)
    print(gr.adj[0], gr.outDegree(0))    
    print(gr.adj[5], gr.outDegree(5))
    print(gr.adj[9], gr.outDegree(9))

    dfs = DFS(g,0)
    print(dfs.visited, dfs.fromVertex)
    print(dfs.pathTo(0))
    print(dfs.pathTo(1))
    print(dfs.pathTo(2))
    print(dfs.pathTo(3))
    print(dfs.pathTo(4))
    print(dfs.pathTo(5))
    print(dfs.pathTo(6))
    print(dfs.pathTo(7))
    print(dfs.hasPathTo(6))
    print(dfs.hasPathTo(7))

    bfs = BFS(g,0)
    print(bfs.visited, bfs.fromVertex)
    print(bfs.pathTo(0), bfs.distTo(0))
    print(bfs.pathTo(1), bfs.distTo(1))
    print(bfs.pathTo(2), bfs.distTo(2))
    print(bfs.pathTo(3), bfs.distTo(3))
    print(bfs.pathTo(4), bfs.distTo(4))
    print(bfs.pathTo(5), bfs.distTo(5))
    print(bfs.pathTo(6), bfs.distTo(6))
    print(bfs.pathTo(7), bfs.distTo(7))
    print(dfs.hasPathTo(6))
    print(dfs.hasPathTo(7))
    '''

    '''
    # Unit test for Web crawling
    #resp = requests.get("https://www.naver.com/")
    #print(resp.text)
    webCrawl(["https://www.naver.com/", "https://www.daum.net/"])
    '''

    '''
    # Unit test for topological Sort
    tasks = Digraph(7)        
    tasks.addEdge(0,1)
    tasks.addEdge(0,2)
    tasks.addEdge(0,5)
    tasks.addEdge(1,4)    
    tasks.addEdge(3,2)
    tasks.addEdge(3,4)
    tasks.addEdge(3,5)
    tasks.addEdge(3,6)
    tasks.addEdge(5,2)
    tasks.addEdge(6,0)   
    tasks.addEdge(6,4)    
    print(topologicalSort(tasks))

    g5 = Digraph(5)
    g5.addEdge(0,1)
    g5.addEdge(1,3)
    g5.addEdge(1,2)
    g5.addEdge(2,3)
    g5.addEdge(3,4)
    g5.addEdge(2,4)
    print("g5, topological order", topologicalSort(g5))
    print("g5r, topological order", topologicalSort(g5.reverse()))
    '''

    # Unit test for Kosaraju-Sharir for Finding Strongly-Connected Components
    g1 = Digraph(6)
    g1.addEdge(0, 1)
    g1.addEdge(1, 2)
    g1.addEdge(2, 4)
    g1.addEdge(3, 5)
    g1.addEdge(4, 0)
    g1.addEdge(4, 1)
    g1.addEdge(5, 3)
    g1.addEdge(5, 4)
    scc1 = SCC(g1)
    print("scc1.count, scc1.id", scc1.count, scc1.id)
    if scc1.count == 2:
        print("pass")
    else:
        print("fail")
    if all([idx >= 0 and idx < scc1.count for idx in scc1.id]):
        print("pass")
    else:
        print("fail")
    print("scc1.connected(1,4)", scc1.connected(1, 4))
    if scc1.connected(1, 4):
        print("pass")
    else:
        print("fail")
    print("scc1.connected(2,5)", scc1.connected(2, 5))
    if not scc1.connected(2, 5):
        print("pass")
    else:
        print("fail")
    print("scc1.connected(3,5)", scc1.connected(3, 5))
    if scc1.connected(3, 5):
        print("pass")
    else:
        print("fail")
    print()

    g2 = Digraph(7)
    g2.addEdge(0, 1)
    g2.addEdge(1, 2)
    g2.addEdge(1, 3)
    g2.addEdge(2, 3)
    g2.addEdge(2, 4)
    g2.addEdge(3, 4)
    g2.addEdge(5, 6)
    g2.addEdge(6, 2)
    g2.addEdge(6, 5)
    scc2 = SCC(g2)
    print("scc2.count, scc2.id", scc2.count, scc2.id)
    print("scc2.connected(0,2)", scc2.connected(0, 2))
    print("scc2.connected(2,4)", scc2.connected(2, 4))
    print("scc2.connected(4,5)", scc2.connected(4, 5))
    print("scc2.connected(5,6)", scc2.connected(5, 6))
    print()

    g3 = Digraph(13)
    g3.addEdge(0, 1)
    g3.addEdge(0, 5)
    g3.addEdge(2, 0)
    g3.addEdge(2, 3)
    g3.addEdge(3, 2)
    g3.addEdge(3, 5)
    g3.addEdge(4, 2)
    g3.addEdge(4, 3)
    g3.addEdge(5, 4)
    g3.addEdge(6, 0)
    g3.addEdge(6, 4)
    g3.addEdge(6, 8)
    g3.addEdge(6, 9)
    g3.addEdge(7, 6)
    g3.addEdge(7, 9)
    g3.addEdge(8, 6)
    g3.addEdge(9, 10)
    g3.addEdge(9, 11)
    g3.addEdge(10, 12)
    g3.addEdge(11, 4)
    g3.addEdge(11, 12)
    g3.addEdge(12, 9)
    print("topologicalSort(g3.reverse())", topologicalSort(g3.reverse()))
    print("topologicalSort(g3)", topologicalSort(g3))

    scc3 = SCC(g3)
    print("scc3.count, scc3.id", scc3.count, scc3.id)
    print("scc3.connected(0,3)", scc3.connected(0, 3))
    print("scc3.connected(0,7)", scc3.connected(0, 7))
    print("scc3.connected(0,9)", scc3.connected(0, 9))
    print("scc3.connected(7,8)", scc3.connected(7, 8))
    print("scc3.connected(7,11)", scc3.connected(7, 11))
    print("scc3.connected(10,12)", scc3.connected(10, 12))
    print()

    # Unit test for speed
    n = 10000
    tSCC = timeit.timeit(lambda: scc3.connected(4, 5), number=n) / n
    tDFS = timeit.timeit(lambda: DFSforEvaluation(g3), number=n) / n
    print(
        f"{n} calls of connected() on g3 took {tSCC:.10f} sec on average, and the same number of calls of DFS() took {tDFS:.10f} sec on average")
    if tSCC * 10 < tDFS:
        print("pass")
    else:
        print("fail")
728x90