본문 바로가기

카테고리 없음

java object find

728x90
import re
import pandas as pd
def reformat_braces(lines):
    """중괄호를 일반적인 위치로 재배치하는 함수"""
    reformatted_lines = []
    for line in lines:
        stripped_line = line.strip()
        
        if stripped_line.endswith("{") and len(stripped_line) > 1:
            reformatted_lines.append(stripped_line[:-1].strip())  # 중괄호 제거 후 추가
            reformatted_lines.append("{")
        elif stripped_line.startswith("}") and len(stripped_line) > 1:
            reformatted_lines.append("}")  # 닫는 중괄호만 추가
            reformatted_lines.append(stripped_line[1:].strip())  # 중괄호 뒤에 내용 추가
        else:
            reformatted_lines.append(stripped_line)
    return reformatted_lines
def auto_indent_java_code(lines: str) -> str:
    # lines = code.split("\n")
    indent_level = 0
    indent_space = "    "  # 4 spaces for indentation
    formatted_lines = []

    for line in lines:
        stripped_line = line.strip()

        # Ignore empty lines
        if not stripped_line:
            formatted_lines.append('')
            continue

        # If the line ends with a closing brace, decrease indent first
        if stripped_line.endswith("}") and not stripped_line.startswith("else") and not stripped_line.startswith("catch") and not stripped_line.startswith("finally"):
            indent_level -= 1
            formatted_lines.append(f"{indent_space * indent_level}{stripped_line}")
            continue

        # Add the current line with the correct indentation
        formatted_lines.append(f"{indent_space * indent_level}{stripped_line}")

        # If the line contains an opening brace, increase the indent for the following line
        if stripped_line.endswith("{"):
            indent_level += 1

    return formatted_lines


# def find_string_usage(lines, target_string):
#     result = []
    
#     # 1. Check for the String type usage (variable assignment)
#     string_pattern = re.compile(r'\b([a-zA-Z_][a-zAZ0-9_]*)\s*=\s*"([^"]*)"')
#     for idx, line in enumerate(lines, 1):
#         match = string_pattern.search(line)
#         if match and match.group(2) == target_string:
#             result.append({
#                 "Type": "String",
#                 "Name": match.group(1),
#                 "Value": match.group(2),
#                 "Line": idx
#             })
    
#     # 2. Check for Function type usage (where the string is passed as a value)
#     function_pattern = re.compile(r'\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*{')
    
#     # Extended control flow keywords to exclude (includes common flow control statements)
#     control_flow_keywords = {
#         'for', 'if', 'while', 'switch', 'try', 'catch', 'finally', 'do'
#     }
    
#     for idx, line in enumerate(lines, 1):
#         match = function_pattern.search(line)
#         if match:
#             function_name = match.group(1)  # Function name is in the first capture group
            
#             # Skip if the function is a control flow statement (e.g., 'for', 'if', etc.)
#             if function_name in control_flow_keywords:
#                 continue
            
#             arguments = match.group(2)  # Function arguments are in the second capture group
#             if target_string in arguments:
#                 result.append({
#                     "Type": "Function",
#                     "Name": function_name,
#                     "Value": target_string,
#                     "Line": idx
#                 })
    
#     return result
def reformat_java_braces(lines: list) -> str:
    formatted_lines = []
    
    # Iterate through each line to adjust curly braces placement
    for i, line in enumerate(lines):
        stripped_line = line.strip()

        # If the line has an opening brace, add it to the previous line
        if stripped_line == "{":
            if formatted_lines:
                formatted_lines[-1] = formatted_lines[-1].rstrip() + " {"
            else:
                formatted_lines.append("{")
        # If the line has a closing brace, append it as a new line
        elif stripped_line == "}":
            formatted_lines.append(line)
        else:
            formatted_lines.append(line)
    
    return '\n'.join(formatted_lines)

# def find_string_usage(lines, target_string):
#     result = []
    
#     # 1. Check for the String type usage (variable assignment)
#     string_pattern = re.compile(r'\b([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"([^"]*)"')
#     for idx, line in enumerate(lines, 1):
#         match = string_pattern.search(line)
#         if match and match.group(2) == target_string:
#             print(line)
#             result.append({
#                 "Type": "String",
#                 "Name": match.group(1),
#                 "Value": match.group(2),
#                 "Line": idx
#             })
    
#     # 2. Check for Function type usage (where the string is passed as a value)
#     function_pattern = re.compile(r'\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*{')
    
#     # Extended control flow keywords to exclude (includes common flow control statements)
#     control_flow_keywords = {
#         'for', 'if', 'while', 'switch', 'try', 'catch', 'finally', 'do', 'else if', 'else'
#     }
    
#     for idx, line in enumerate(lines, 1):
#         match = function_pattern.search(line)
#         if match:
#             print(line)
#             function_name = match.group(1)  # Function name is in the first capture group
            
#             # Skip if the function is a control flow statement (e.g., 'for', 'if', etc.)
#             if function_name in control_flow_keywords:
#                 continue

#             arguments = match.group(2)  # Function arguments are in the second capture group
#             if target_string in arguments:
#                 result.append({
#                     "Type": "Function",
#                     "Name": function_name,
#                     "Value": target_string,
#                     "Line": idx
#                 })
    
#     return result
def find_functions_and_objects(lines, target_string):
    result = []
    target_objects = set()  # Set to store unique object names using target_string

    # Step 1: Find string objects using target_string
    # string_pattern = re.compile(r'\b([a-zA-Z_][a-zA-Z0-9_]*)\s*=\s*"([^"]*)"')
    # string_pattern = r'String\s+(\w+)\s*=\s*"([^"]*' + re.escape(target_string) + r'[^"]*)";'
    string_pattern = re.compile(
    r'String\s+(\w+)\s*=\s*"([^"]*' + re.escape(target_string) + r'[^"]*)";|(\w+\.\w+\s*\(\s*"[^"]*\b' + re.escape(target_string) + r'\b[^"]*"\s*\))'
    )
    # string_pattern = r'"([^"]*' + re.escape(target_string) + r'[^"]*)";'
    
    for idx, line in enumerate(lines, 1):
        # match = string_pattern.search(line)
        matches = re.finditer(string_pattern, line)
        for match in matches:
            if match : # and match.group(2) == target_string:
                # print( match.group(1))
                if match.group(1) is None :
                    target_objects.add(target_string)
                    result.append({
                        "Type": "String",
                        "Name":  target_string,
                        "Value": target_string,
                        "Line": idx
                    })
                else : 
                    target_objects.add(match.group(1))
                    result.append({
                        "Type": "String",
                        "Name": match.group(1),
                        "Value": match.group(2),
                        "Line": idx
                    })
                    
    string_df=pd.DataFrame(result)
    # print(result)
    # print(target_objects)
    # Step 2: Find functions and check usage of target_objects
    function_pattern = re.compile(r'\b([a-zA-Z_][a-zA-Z0-9_]*)\s*\(([^)]*)\)\s*{')
    control_flow_keywords = {'for', 'if', 'while', 'switch', 'try', 'catch', 'finally', 'do', 'else if', 'else'}
    
    inside_function = False
    current_function = None

    for idx, line in enumerate(lines, 1):
        match = function_pattern.search(line)
        
        # Detect function definition
        if match:
            function_name = match.group(1)
            if function_name not in control_flow_keywords:
                # print(function_name)
                inside_function = True
                current_function = function_name
            continue
        
        # If inside a function, check for target_objects usage
        if inside_function and current_function:
            for obj in target_objects:
                
                # if target_string in line  : 
                #     result.append({
                #         "Type": "Function",
                #         "Name": current_function,
                #         "Value": target_string,
                #         "Line": idx
                #     })
                # elif target_string not in line :
                if obj in line :  # Check if the object is used in the current line
                    if  idx in list(set(list(string_df['Line']))) : 
                        result.append({
                            "Type": "Function",
                            "Name": current_function,
                            "Value": target_string,
                            "Line": idx
                        })
                    else : 
                        result.append({
                            "Type": "Function",
                            "Name": current_function,
                            "Value": obj,
                            "Line": idx
                        })
        
        # Detect end of function block (simple heuristic for this example)
        if "}" in line:  # This is a simplification; may need to track braces for nested functions
            inside_function = False
            current_function = None

    return result

def extract_string_and_function_details(file_path, search_string):
    comment_pattern = r"(//.*?$|/\*.*?\*/|/\*\*.*?\*/)"  # 주석 제거

    

    results = []

    with open(file_path, 'r', encoding='utf-8') as file:
        lines = file.readlines()

    # 주석 제거
    content_no_comments = re.sub(comment_pattern, "", ''.join(lines), flags=re.S | re.M)
    lines = content_no_comments.splitlines()
    non_empty_lines = [line for line in lines if line.strip()]  # 공백 제거
    reformatted_lines = reformat_braces(non_empty_lines)

    # 저장
    with open(file_path.split(".java")[0]+"_1"+".java", 'w', encoding='utf-8') as file:
        file.write('\n'.join(reformatted_lines))
    # print(reformatted_lines)
    # 자동 정렬: 들여쓰기 처리
    formatted_lines = auto_indent_java_code(reformatted_lines)
    
    # TODO 함수 다시 찾기 (){}
    
    
    
    # print(formatted_lines[0])
    # 결과 저장
    with open(file_path.split(".java")[0]+"_2"+".java", 'w', encoding='utf-8') as file:
        file.write('\n'.join(formatted_lines))
    
    
    
    last_lines=reformat_java_braces(formatted_lines)
    # 결과 저장
    with open(file_path.split(".java")[0]+"_3"+".java", 'w', encoding='utf-8') as file:
        file.write(last_lines)
    
    # usage_info = find_string_usage(last_lines.split('\n'), search_string)
    usage_info = find_functions_and_objects(last_lines.split('\n'), search_string)
    return usage_info

 

 

실행 영역

 

# 사용 예제
file_path = "KNNClassifier.java"  # 분석할 Java 파일 경로
# nocomment_file_path = "KNNClassifier_NoComment.java"
# reformatted_file_path = "KNNClassifier_reformatted.java"
main_path = "C:/Users/l4rea/Documents/codes/python/project/05_pmutil/java_db/"
    
search_string = "TEST_DB"  # 검색할 문자열 값
occurrences = extract_string_and_function_details(main_path+file_path, search_string)

# 결과 출력
print("Occurrences of search_string:")
for occurrence in occurrences:
    print(occurrence)